{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.datasets import fetch_20newsgroups\n",
    "from bertopic import BERTopic\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "os.environ[\"CUDA_DEVICE_ORDER\"]=\"PCI_BUS_ID\" \n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"3\" \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Topic</th>\n",
       "      <th>Count</th>\n",
       "      <th>Name</th>\n",
       "      <th>Representation</th>\n",
       "      <th>Llama2</th>\n",
       "      <th>Representative_Docs</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-1</td>\n",
       "      <td>2160</td>\n",
       "      <td>-1_growth_mergers_finance_economic growth</td>\n",
       "      <td>['growth', 'mergers', 'finance', 'economic gro...</td>\n",
       "      <td>['Business and Economic Growth', '', '', '', '...</td>\n",
       "      <td>['mergers, acquisitions, dividends', 'antitrus...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>518</td>\n",
       "      <td>0_oil_oil prices_energy_gas</td>\n",
       "      <td>['oil', 'oil prices', 'energy', 'gas', 'prices...</td>\n",
       "      <td>['Oil prices and their impact on the energy in...</td>\n",
       "      <td>['oil, prices', 'oil, prices, firms', 'oil pri...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>199</td>\n",
       "      <td>1_takeover_shares_ownership_securities</td>\n",
       "      <td>['takeover', 'shares', 'ownership', 'securitie...</td>\n",
       "      <td>['Corporate takeovers and share ownership', ''...</td>\n",
       "      <td>['takeover, tender offer, shares', 'takeover, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>126</td>\n",
       "      <td>2_acquisitions debt_acquisitions_acquisitions ...</td>\n",
       "      <td>['acquisitions debt', 'acquisitions', 'acquisi...</td>\n",
       "      <td>['Corporate finance and acquisitions', '', '',...</td>\n",
       "      <td>['acquisitions, debt, downgrade', 'finance, ac...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>3</td>\n",
       "      <td>124</td>\n",
       "      <td>3_dividend_dividend finance_dividends_finance ...</td>\n",
       "      <td>['dividend', 'dividend finance', 'dividends', ...</td>\n",
       "      <td>['Dividend Finance', '', '', '', '', '', '', '...</td>\n",
       "      <td>['dividend, finance', 'dividend, finance', 'di...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>138</th>\n",
       "      <td>137</td>\n",
       "      <td>21</td>\n",
       "      <td>137_soybeans_cotton_agriculture soybeans_trade...</td>\n",
       "      <td>['soybeans', 'cotton', 'agriculture soybeans',...</td>\n",
       "      <td>['Soybean trade and regulation', '', '', '', '...</td>\n",
       "      <td>['agriculture, soybeans, trade', 'imports, soy...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>139</th>\n",
       "      <td>138</td>\n",
       "      <td>21</td>\n",
       "      <td>138_budget deficit_deficit_budget_cuts</td>\n",
       "      <td>['budget deficit', 'deficit', 'budget', 'cuts'...</td>\n",
       "      <td>['Fiscal policy and government spending', '', ...</td>\n",
       "      <td>['budget, deficit, taxes', 'budget, deficit, g...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>140</th>\n",
       "      <td>139</td>\n",
       "      <td>21</td>\n",
       "      <td>139_energy_energy prices_economy energy_prices...</td>\n",
       "      <td>['energy', 'energy prices', 'economy energy', ...</td>\n",
       "      <td>['Energy market and prices', '', '', '', '', '...</td>\n",
       "      <td>['economy, energy, prices', 'economy, energy, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>141</th>\n",
       "      <td>140</td>\n",
       "      <td>21</td>\n",
       "      <td>140_loan_loans_losses_loan loss</td>\n",
       "      <td>['loan', 'loans', 'losses', 'loan loss', 'prov...</td>\n",
       "      <td>['Financial performance and loan losses', '', ...</td>\n",
       "      <td>['earnings, loan loss provisions, financial re...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>142</th>\n",
       "      <td>141</td>\n",
       "      <td>21</td>\n",
       "      <td>141_exports trade_rates central_central bank_a...</td>\n",
       "      <td>['exports trade', 'rates central', 'central ba...</td>\n",
       "      <td>['Monetary Policy and Central Banks', '', '', ...</td>\n",
       "      <td>['monetary policy, interest rates, central ban...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>143 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Topic  Count                                               Name  \\\n",
       "0       -1   2160          -1_growth_mergers_finance_economic growth   \n",
       "1        0    518                        0_oil_oil prices_energy_gas   \n",
       "2        1    199             1_takeover_shares_ownership_securities   \n",
       "3        2    126  2_acquisitions debt_acquisitions_acquisitions ...   \n",
       "4        3    124  3_dividend_dividend finance_dividends_finance ...   \n",
       "..     ...    ...                                                ...   \n",
       "138    137     21  137_soybeans_cotton_agriculture soybeans_trade...   \n",
       "139    138     21             138_budget deficit_deficit_budget_cuts   \n",
       "140    139     21  139_energy_energy prices_economy energy_prices...   \n",
       "141    140     21                    140_loan_loans_losses_loan loss   \n",
       "142    141     21  141_exports trade_rates central_central bank_a...   \n",
       "\n",
       "                                        Representation  \\\n",
       "0    ['growth', 'mergers', 'finance', 'economic gro...   \n",
       "1    ['oil', 'oil prices', 'energy', 'gas', 'prices...   \n",
       "2    ['takeover', 'shares', 'ownership', 'securitie...   \n",
       "3    ['acquisitions debt', 'acquisitions', 'acquisi...   \n",
       "4    ['dividend', 'dividend finance', 'dividends', ...   \n",
       "..                                                 ...   \n",
       "138  ['soybeans', 'cotton', 'agriculture soybeans',...   \n",
       "139  ['budget deficit', 'deficit', 'budget', 'cuts'...   \n",
       "140  ['energy', 'energy prices', 'economy energy', ...   \n",
       "141  ['loan', 'loans', 'losses', 'loan loss', 'prov...   \n",
       "142  ['exports trade', 'rates central', 'central ba...   \n",
       "\n",
       "                                                Llama2  \\\n",
       "0    ['Business and Economic Growth', '', '', '', '...   \n",
       "1    ['Oil prices and their impact on the energy in...   \n",
       "2    ['Corporate takeovers and share ownership', ''...   \n",
       "3    ['Corporate finance and acquisitions', '', '',...   \n",
       "4    ['Dividend Finance', '', '', '', '', '', '', '...   \n",
       "..                                                 ...   \n",
       "138  ['Soybean trade and regulation', '', '', '', '...   \n",
       "139  ['Fiscal policy and government spending', '', ...   \n",
       "140  ['Energy market and prices', '', '', '', '', '...   \n",
       "141  ['Financial performance and loan losses', '', ...   \n",
       "142  ['Monetary Policy and Central Banks', '', '', ...   \n",
       "\n",
       "                                   Representative_Docs  \n",
       "0    ['mergers, acquisitions, dividends', 'antitrus...  \n",
       "1    ['oil, prices', 'oil, prices, firms', 'oil pri...  \n",
       "2    ['takeover, tender offer, shares', 'takeover, ...  \n",
       "3    ['acquisitions, debt, downgrade', 'finance, ac...  \n",
       "4    ['dividend, finance', 'dividend, finance', 'di...  \n",
       "..                                                 ...  \n",
       "138  ['agriculture, soybeans, trade', 'imports, soy...  \n",
       "139  ['budget, deficit, taxes', 'budget, deficit, g...  \n",
       "140  ['economy, energy, prices', 'economy, energy, ...  \n",
       "141  ['earnings, loan loss provisions, financial re...  \n",
       "142  ['monetary policy, interest rates, central ban...  \n",
       "\n",
       "[143 rows x 6 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv ('../../datasets/Reuters-21578/bertopic_result/50topic_info_3000.csv')\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "rep_label = []\n",
    "for i in range(len(df)):\n",
    "    s = df['Representation'][i]\n",
    "    rep_label.append(s[1:-1].split(',')[0][1:-1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "llama_label = []\n",
    "for i in range(len(df)):\n",
    "    s = df['Llama2'][i]\n",
    "    llama_label.append(s[1:-1].split(',')[0][1:-1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "file1 = open('../../datasets/Reuters-21578/test_raw_texts.txt', 'r')\n",
    "documents = file1.readlines()[:1000]  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f6f85dd877ba4440bfad6c44a5757ca5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Batches:   0%|          | 0/32 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from sentence_transformers import SentenceTransformer\n",
    "\n",
    "# Pre-calculate embeddings\n",
    "embedding_model = SentenceTransformer(\"all-MiniLM-L6-v2\")\n",
    "embeddings = embedding_model.encode(documents, show_progress_bar=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "from umap import UMAP\n",
    "\n",
    "umap_model = UMAP(n_neighbors=5, n_components=5, min_dist=0.0, metric='cosine', random_state=42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "from hdbscan import HDBSCAN\n",
    "\n",
    "hdbscan_model = HDBSCAN(min_cluster_size=20, metric='euclidean', cluster_selection_method='eom', prediction_data=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.feature_extraction.text import CountVectorizer\n",
    "vectorizer_model = CountVectorizer(stop_words=\"english\", min_df=2, ngram_range=(1, 2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "zeroshot_topic_list = llama_label"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024-01-18 10:04:45,177 - BERTopic - Zeroshot Step 1 - Finding documents that could be assigned to either one of the zero-shot topics\n",
      "2024-01-18 10:04:45,865 - BERTopic - Zeroshot Step 2 - Clustering documents that were not found in the zero-shot model...\n",
      "2024-01-18 10:04:45,868 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm\n",
      "2024-01-18 10:04:45,868 - BERTopic - Dimensionality - Completed ✓\n",
      "2024-01-18 10:04:45,870 - BERTopic - Cluster - Start clustering the reduced embeddings\n",
      "2024-01-18 10:04:45,872 - BERTopic - Cluster - Completed ✓\n",
      "2024-01-18 10:04:45,876 - BERTopic - Representation - Extracting topics from clusters using representation models.\n",
      "2024-01-18 10:04:46,240 - BERTopic - Representation - Completed ✓\n",
      "2024-01-18 10:04:46,557 - BERTopic - Zeroshot Step 2 - Completed ✓\n",
      "2024-01-18 10:04:46,558 - BERTopic - Zeroshot Step 3 - Combining clustered topics with the zeroshot model\n"
     ]
    }
   ],
   "source": [
    "topic_model = BERTopic(\n",
    "\n",
    "  # Sub-models\n",
    "  embedding_model=embedding_model,\n",
    "  umap_model=umap_model,\n",
    "  hdbscan_model=hdbscan_model,\n",
    "  vectorizer_model=vectorizer_model,\n",
    "  zeroshot_topic_list=zeroshot_topic_list,\n",
    "  zeroshot_min_similarity=.05,\n",
    "\n",
    "  # Hyperparameters\n",
    "  top_n_words=10,\n",
    "  verbose=True,\n",
    "  calculate_probabilities=True,\n",
    ")\n",
    "\n",
    "# Train model\n",
    "topics, probs = topic_model.fit_transform(documents, embeddings)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Topic</th>\n",
       "      <th>Count</th>\n",
       "      <th>Name</th>\n",
       "      <th>Representation</th>\n",
       "      <th>Representative_Docs</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>103</td>\n",
       "      <td>Earnings from Discontinued Operations</td>\n",
       "      <td>[dlrs, 000 dlrs, 000, quarter, loss, lt, net, ...</td>\n",
       "      <td>[XEBEC &amp;lt;XEBC&gt; TO REPORT 2ND QTR LOSS  Xebec...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>55</td>\n",
       "      <td>Telecom earnings quarterly results</td>\n",
       "      <td>[cts, vs, cts vs, shr, net, 000 vs, revs, 000,...</td>\n",
       "      <td>[ELECTRO RENT CORP &amp;lt;ELRC&gt; 3RD QTR FEB 28 NE...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>45</td>\n",
       "      <td>Financial instruments and interest rates</td>\n",
       "      <td>[days, 00 pct, rate, pct, net, billion, mln dl...</td>\n",
       "      <td>[J.P. MORGAN &amp;lt;JPM&gt; NET HURT BY BRAZIL, TRAD...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>39</td>\n",
       "      <td>Financial Performance and Losses</td>\n",
       "      <td>[loss, vs loss, profit, net loss, vs, shr loss...</td>\n",
       "      <td>[ATLAS CONSOLIDATED MINING &amp;lt;ACMB&gt; 4TH QTR  ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>38</td>\n",
       "      <td>Corporate takeovers and share ownership</td>\n",
       "      <td>[shares, offer, common, stake, stock, dome, ua...</td>\n",
       "      <td>[CRAZY EDDIE &amp;lt;CRZY&gt; SETS DEFENSIVE RIGHTS  ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107</th>\n",
       "      <td>107</td>\n",
       "      <td>1</td>\n",
       "      <td>Corporate Leadership Changes</td>\n",
       "      <td>[shr 37, 18 dlrs, 37 dlrs, general electric, v...</td>\n",
       "      <td>[GENERAL ELECTRIC CO 1ST QTR SHR 1.37 DLRS VS ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>108</th>\n",
       "      <td>108</td>\n",
       "      <td>1</td>\n",
       "      <td>Corporate earnings and dividends</td>\n",
       "      <td>[tax gain, include tax, stock distribution, 77...</td>\n",
       "      <td>[DOW JONES AND CO INC &amp;lt;DJ&gt; 1ST QTR NET  Shr...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>109</th>\n",
       "      <td>109</td>\n",
       "      <td>1</td>\n",
       "      <td>Economic Sanctions and Trade Restrictions in S...</td>\n",
       "      <td>[court press, fitzwater, press, sanctions, eff...</td>\n",
       "      <td>[WHITE HOUSE STANDING FIRM ON JAPANESE SANCTIO...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>110</th>\n",
       "      <td>110</td>\n",
       "      <td>1</td>\n",
       "      <td>Government Subsidies in Agricultural Trade</td>\n",
       "      <td>[retail, maize, price, imf, zambian, price mai...</td>\n",
       "      <td>[ZAMBIA DOES NOT PLAN RETAIL MAIZE PRICE HIKE ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>111</th>\n",
       "      <td>111</td>\n",
       "      <td>1</td>\n",
       "      <td>Corporate financial performance</td>\n",
       "      <td>[295 000, vs 295, 44 cts, new england, 499 000...</td>\n",
       "      <td>[NEW ENGLAND SAVINGS BANK &amp;lt;NESB&gt; 1ST QTR NE...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>112 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Topic  Count                                               Name  \\\n",
       "0        0    103              Earnings from Discontinued Operations   \n",
       "1        1     55                 Telecom earnings quarterly results   \n",
       "2        2     45           Financial instruments and interest rates   \n",
       "3        3     39                   Financial Performance and Losses   \n",
       "4        4     38            Corporate takeovers and share ownership   \n",
       "..     ...    ...                                                ...   \n",
       "107    107      1                       Corporate Leadership Changes   \n",
       "108    108      1                   Corporate earnings and dividends   \n",
       "109    109      1  Economic Sanctions and Trade Restrictions in S...   \n",
       "110    110      1         Government Subsidies in Agricultural Trade   \n",
       "111    111      1                    Corporate financial performance   \n",
       "\n",
       "                                        Representation  \\\n",
       "0    [dlrs, 000 dlrs, 000, quarter, loss, lt, net, ...   \n",
       "1    [cts, vs, cts vs, shr, net, 000 vs, revs, 000,...   \n",
       "2    [days, 00 pct, rate, pct, net, billion, mln dl...   \n",
       "3    [loss, vs loss, profit, net loss, vs, shr loss...   \n",
       "4    [shares, offer, common, stake, stock, dome, ua...   \n",
       "..                                                 ...   \n",
       "107  [shr 37, 18 dlrs, 37 dlrs, general electric, v...   \n",
       "108  [tax gain, include tax, stock distribution, 77...   \n",
       "109  [court press, fitzwater, press, sanctions, eff...   \n",
       "110  [retail, maize, price, imf, zambian, price mai...   \n",
       "111  [295 000, vs 295, 44 cts, new england, 499 000...   \n",
       "\n",
       "                                   Representative_Docs  \n",
       "0    [XEBEC &lt;XEBC> TO REPORT 2ND QTR LOSS  Xebec...  \n",
       "1    [ELECTRO RENT CORP &lt;ELRC> 3RD QTR FEB 28 NE...  \n",
       "2    [J.P. MORGAN &lt;JPM> NET HURT BY BRAZIL, TRAD...  \n",
       "3    [ATLAS CONSOLIDATED MINING &lt;ACMB> 4TH QTR  ...  \n",
       "4    [CRAZY EDDIE &lt;CRZY> SETS DEFENSIVE RIGHTS  ...  \n",
       "..                                                 ...  \n",
       "107  [GENERAL ELECTRIC CO 1ST QTR SHR 1.37 DLRS VS ...  \n",
       "108  [DOW JONES AND CO INC &lt;DJ> 1ST QTR NET  Shr...  \n",
       "109  [WHITE HOUSE STANDING FIRM ON JAPANESE SANCTIO...  \n",
       "110  [ZAMBIA DOES NOT PLAN RETAIL MAIZE PRICE HIKE ...  \n",
       "111  [NEW ENGLAND SAVINGS BANK &lt;NESB> 1ST QTR NE...  \n",
       "\n",
       "[112 rows x 5 columns]"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "topic_model.get_topic_info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Document</th>\n",
       "      <th>Topic</th>\n",
       "      <th>Name</th>\n",
       "      <th>Representation</th>\n",
       "      <th>Representative_Docs</th>\n",
       "      <th>Top_n_words</th>\n",
       "      <th>Representative_document</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>ASIAN EXPORTERS FEAR DAMAGE FROM U.S.-JAPAN RI...</td>\n",
       "      <td>57</td>\n",
       "      <td>International Trade and Exports</td>\n",
       "      <td>[trade, steel, exports, imports, japan, steel ...</td>\n",
       "      <td>[CANADA PLANS TO MONITOR STEEL IMPORTS, EXPORT...</td>\n",
       "      <td>trade - steel - exports - imports - japan - st...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>CHINA DAILY SAYS VERMIN EAT 7-12 PCT GRAIN STO...</td>\n",
       "      <td>71</td>\n",
       "      <td>Agricultural Production and Disease</td>\n",
       "      <td>[hectares, mln hectares, china, dry spell, dry...</td>\n",
       "      <td>[CHINA DAILY SAYS VERMIN EAT 7-12 PCT GRAIN ST...</td>\n",
       "      <td>hectares - mln hectares - china - dry spell - ...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>JAPAN TO REVISE LONG-TERM ENERGY DEMAND DOWNWA...</td>\n",
       "      <td>89</td>\n",
       "      <td>Energy market and prices</td>\n",
       "      <td>[energy, miti, demand, revise, natural, power,...</td>\n",
       "      <td>[JAPAN TO REVISE LONG-TERM ENERGY DEMAND DOWNW...</td>\n",
       "      <td>energy - miti - demand - revise - natural - po...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>THAI TRADE DEFICIT WIDENS IN FIRST QUARTER  Th...</td>\n",
       "      <td>5</td>\n",
       "      <td>Agricultural Exports</td>\n",
       "      <td>[nil, nil nil, 87, 09, 09 87, tonnes, 1986 87,...</td>\n",
       "      <td>[WORLD SUPPLY/DEMAND ESTIMATES ISSUED BY USDA ...</td>\n",
       "      <td>nil - nil nil - 87 - 09 - 09 87 - tonnes - 198...</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>INDONESIA SEES CPO PRICE RISING SHARPLY  Indon...</td>\n",
       "      <td>30</td>\n",
       "      <td>OPEC oil production and prices</td>\n",
       "      <td>[prices, producer prices, crude, cts barrel, s...</td>\n",
       "      <td>[GERMAN PRODUCER PRICES FALL 0.1 PCT IN MARCH ...</td>\n",
       "      <td>prices - producer prices - crude - cts barrel ...</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>995</th>\n",
       "      <td>HONEYWELL INC &amp;lt;HON&gt; 1ST QTR OPER NET  Oper ...</td>\n",
       "      <td>29</td>\n",
       "      <td>Aviation Financing and Technology</td>\n",
       "      <td>[showboat, atlantic, fleet, hotel, quarter, ch...</td>\n",
       "      <td>[ATLANTIC FINANCIAL &amp;lt;ATLF.O&gt; TO ACQUIRE S A...</td>\n",
       "      <td>showboat - atlantic - fleet - hotel - quarter ...</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>996</th>\n",
       "      <td>WALL STREET STOCKS/BROWNING FERRIS &amp;lt;BFI&gt;  T...</td>\n",
       "      <td>8</td>\n",
       "      <td>Corporate actions: stock splits and earnings</td>\n",
       "      <td>[split, stock split, stock, mln vs, shares, vs...</td>\n",
       "      <td>[MAYFAIR SUPER MARKETS INC &amp;lt;MYFRA&gt; 2ND QTR ...</td>\n",
       "      <td>split - stock split - stock - mln vs - shares ...</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>997</th>\n",
       "      <td>HOMESTAKE &amp;lt;HM&gt; MULLS BUYING ORE RESERVES  H...</td>\n",
       "      <td>37</td>\n",
       "      <td>Mining and Gold Reserves</td>\n",
       "      <td>[gold, atlas, exploration, tons, 000 tons, min...</td>\n",
       "      <td>[GORDEX MINERALS LOCATES CANADA GOLD DEPOSITS ...</td>\n",
       "      <td>gold - atlas - exploration - tons - 000 tons -...</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>998</th>\n",
       "      <td>CHRONAR CORP &amp;lt;CRNR.O&gt; YEAR LOSS  Shr loss 9...</td>\n",
       "      <td>0</td>\n",
       "      <td>Earnings from Discontinued Operations</td>\n",
       "      <td>[dlrs, 000 dlrs, 000, quarter, loss, lt, net, ...</td>\n",
       "      <td>[XEBEC &amp;lt;XEBC&gt; TO REPORT 2ND QTR LOSS  Xebec...</td>\n",
       "      <td>dlrs - 000 dlrs - 000 - quarter - loss - lt - ...</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>999</th>\n",
       "      <td>EMPIRE &amp;lt;EMPA.TO&gt; TO BUY SOBEYS &amp;lt;SYSA.TO&gt;...</td>\n",
       "      <td>0</td>\n",
       "      <td>Earnings from Discontinued Operations</td>\n",
       "      <td>[dlrs, 000 dlrs, 000, quarter, loss, lt, net, ...</td>\n",
       "      <td>[XEBEC &amp;lt;XEBC&gt; TO REPORT 2ND QTR LOSS  Xebec...</td>\n",
       "      <td>dlrs - 000 dlrs - 000 - quarter - loss - lt - ...</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1000 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                              Document  Topic  \\\n",
       "0    ASIAN EXPORTERS FEAR DAMAGE FROM U.S.-JAPAN RI...     57   \n",
       "1    CHINA DAILY SAYS VERMIN EAT 7-12 PCT GRAIN STO...     71   \n",
       "2    JAPAN TO REVISE LONG-TERM ENERGY DEMAND DOWNWA...     89   \n",
       "3    THAI TRADE DEFICIT WIDENS IN FIRST QUARTER  Th...      5   \n",
       "4    INDONESIA SEES CPO PRICE RISING SHARPLY  Indon...     30   \n",
       "..                                                 ...    ...   \n",
       "995  HONEYWELL INC &lt;HON> 1ST QTR OPER NET  Oper ...     29   \n",
       "996  WALL STREET STOCKS/BROWNING FERRIS &lt;BFI>  T...      8   \n",
       "997  HOMESTAKE &lt;HM> MULLS BUYING ORE RESERVES  H...     37   \n",
       "998  CHRONAR CORP &lt;CRNR.O> YEAR LOSS  Shr loss 9...      0   \n",
       "999  EMPIRE &lt;EMPA.TO> TO BUY SOBEYS &lt;SYSA.TO>...      0   \n",
       "\n",
       "                                             Name  \\\n",
       "0                 International Trade and Exports   \n",
       "1             Agricultural Production and Disease   \n",
       "2                        Energy market and prices   \n",
       "3                            Agricultural Exports   \n",
       "4                  OPEC oil production and prices   \n",
       "..                                            ...   \n",
       "995             Aviation Financing and Technology   \n",
       "996  Corporate actions: stock splits and earnings   \n",
       "997                      Mining and Gold Reserves   \n",
       "998         Earnings from Discontinued Operations   \n",
       "999         Earnings from Discontinued Operations   \n",
       "\n",
       "                                        Representation  \\\n",
       "0    [trade, steel, exports, imports, japan, steel ...   \n",
       "1    [hectares, mln hectares, china, dry spell, dry...   \n",
       "2    [energy, miti, demand, revise, natural, power,...   \n",
       "3    [nil, nil nil, 87, 09, 09 87, tonnes, 1986 87,...   \n",
       "4    [prices, producer prices, crude, cts barrel, s...   \n",
       "..                                                 ...   \n",
       "995  [showboat, atlantic, fleet, hotel, quarter, ch...   \n",
       "996  [split, stock split, stock, mln vs, shares, vs...   \n",
       "997  [gold, atlas, exploration, tons, 000 tons, min...   \n",
       "998  [dlrs, 000 dlrs, 000, quarter, loss, lt, net, ...   \n",
       "999  [dlrs, 000 dlrs, 000, quarter, loss, lt, net, ...   \n",
       "\n",
       "                                   Representative_Docs  \\\n",
       "0    [CANADA PLANS TO MONITOR STEEL IMPORTS, EXPORT...   \n",
       "1    [CHINA DAILY SAYS VERMIN EAT 7-12 PCT GRAIN ST...   \n",
       "2    [JAPAN TO REVISE LONG-TERM ENERGY DEMAND DOWNW...   \n",
       "3    [WORLD SUPPLY/DEMAND ESTIMATES ISSUED BY USDA ...   \n",
       "4    [GERMAN PRODUCER PRICES FALL 0.1 PCT IN MARCH ...   \n",
       "..                                                 ...   \n",
       "995  [ATLANTIC FINANCIAL &lt;ATLF.O> TO ACQUIRE S A...   \n",
       "996  [MAYFAIR SUPER MARKETS INC &lt;MYFRA> 2ND QTR ...   \n",
       "997  [GORDEX MINERALS LOCATES CANADA GOLD DEPOSITS ...   \n",
       "998  [XEBEC &lt;XEBC> TO REPORT 2ND QTR LOSS  Xebec...   \n",
       "999  [XEBEC &lt;XEBC> TO REPORT 2ND QTR LOSS  Xebec...   \n",
       "\n",
       "                                           Top_n_words  \\\n",
       "0    trade - steel - exports - imports - japan - st...   \n",
       "1    hectares - mln hectares - china - dry spell - ...   \n",
       "2    energy - miti - demand - revise - natural - po...   \n",
       "3    nil - nil nil - 87 - 09 - 09 87 - tonnes - 198...   \n",
       "4    prices - producer prices - crude - cts barrel ...   \n",
       "..                                                 ...   \n",
       "995  showboat - atlantic - fleet - hotel - quarter ...   \n",
       "996  split - stock split - stock - mln vs - shares ...   \n",
       "997  gold - atlas - exploration - tons - 000 tons -...   \n",
       "998  dlrs - 000 dlrs - 000 - quarter - loss - lt - ...   \n",
       "999  dlrs - 000 dlrs - 000 - quarter - loss - lt - ...   \n",
       "\n",
       "     Representative_document  \n",
       "0                       True  \n",
       "1                       True  \n",
       "2                       True  \n",
       "3                      False  \n",
       "4                      False  \n",
       "..                       ...  \n",
       "995                    False  \n",
       "996                    False  \n",
       "997                    False  \n",
       "998                    False  \n",
       "999                    False  \n",
       "\n",
       "[1000 rows x 7 columns]"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = topic_model.get_document_info(documents)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "file1 = open('../../datasets/Reuters-21578/test_label.txt', 'r')\n",
    "labels = file1.readlines()  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3019"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_set = []  \n",
    "for label in labels:\n",
    "    lbs = label.strip().split(' ')\n",
    "    test_set.append(lbs)\n",
    "len(test_set)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "true_labels = []\n",
    "for row in labels:\n",
    "    lb_set = row.strip().split(' ')\n",
    "    for lb in lb_set:\n",
    "        if not lb in true_labels:\n",
    "            true_labels.append(lb)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "90"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(true_labels)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Information Retrieval',\n",
       " 'Methodology',\n",
       " 'Quantum Physics',\n",
       " 'Information Theory',\n",
       " 'Information Theory',\n",
       " 'Applications',\n",
       " 'Computer Vision and Pattern Recognition',\n",
       " 'Computation and Language',\n",
       " 'Artificial Intelligence',\n",
       " 'Numerical Analysis',\n",
       " 'Mathematical Software',\n",
       " 'Cryptography and Security',\n",
       " 'Software Engineering',\n",
       " 'Machine Learning',\n",
       " 'Networking and Internet Architecture',\n",
       " 'Systems and Control',\n",
       " 'Data Structures and Algorithms',\n",
       " 'Computational Complexity',\n",
       " 'Formal Languages and Automata Theory',\n",
       " 'Robotics',\n",
       " 'Optimization and Control',\n",
       " 'Multiagent Systems',\n",
       " 'Performance',\n",
       " 'Social and Information Networks',\n",
       " 'Physics and Society;',\n",
       " 'Distributed, Parallel, and Cluster Computing',\n",
       " 'Databases',\n",
       " 'Combinatorics',\n",
       " 'Machine Learning',\n",
       " 'Probability',\n",
       " 'Neural and Evolutionary Computing',\n",
       " 'Discrete Mathematics',\n",
       " 'Statistical Mechanics;',\n",
       " 'Logic in Computer Science',\n",
       " 'Computers and Society',\n",
       " 'Disordered Systems and Neural Networks;',\n",
       " 'Numerical Analysis',\n",
       " 'Computational Engineering, Finance, and Science',\n",
       " 'Symbolic Computation',\n",
       " 'Computer Science and Game Theory',\n",
       " 'Human-Computer Interaction',\n",
       " 'Quantitative Methods',\n",
       " 'Digital Libraries',\n",
       " 'Neurons and Cognition',\n",
       " 'Computational Geometry',\n",
       " 'Computation and Language;',\n",
       " 'Programming Languages',\n",
       " 'Logic',\n",
       " 'Number Theory',\n",
       " 'Multimedia',\n",
       " 'Statistics Theory',\n",
       " 'Statistics Theory',\n",
       " 'Adaptation and Self-Organizing Systems',\n",
       " 'Statistics and Probability;']"
      ]
     },
     "execution_count": 92,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "true_labels = []\n",
    "for row in labels:\n",
    "    true_labels.append(row.split('; ')[1].strip())\n",
    "true_labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "19658"
      ]
     },
     "execution_count": 144,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "file2 = open('../../datasets/Amazon-531/test_label.txt', 'r')\n",
    "test_label_set = file2.readlines()\n",
    "test_set = []  \n",
    "for label in test_label_set:\n",
    "    labels = label.strip().split(', ')\n",
    "    test_set.append(labels)\n",
    "len(test_set)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[['beauty', 'hair_care', 'styling_tools,'],\n",
       " ['toys_games', 'dolls_accessories', 'dolls,'],\n",
       " ['baby_products', 'gifts,'],\n",
       " ['grocery_gourmet_food', 'pantry_staples', 'canned_jarred_food,'],\n",
       " ['beauty', 'skin_care', 'sun,'],\n",
       " ['grocery_gourmet_food', 'beverages', 'tea,'],\n",
       " ['beauty', 'skin_care', 'body,'],\n",
       " ['baby_products', 'gear', 'baby_gyms_playmats,'],\n",
       " ['health_personal_care', 'personal_care', 'oral_hygiene,'],\n",
       " ['toys_games', 'dress_up_pretend_play', 'pretend_play,'],\n",
       " ['health_personal_care', 'personal_care', 'lip_care_products,'],\n",
       " ['beauty', 'bath_body', 'bath,'],\n",
       " ['pet_supplies', 'dogs', 'feeding_watering_supplies,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'nutrition_bars_drinks,'],\n",
       " ['health_personal_care', 'health_care', 'massage_relaxation,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['beauty', 'hair_care', 'hair_perms_texturizers,'],\n",
       " ['toys_games', 'dolls_accessories', 'doll_accessories,'],\n",
       " ['beauty', 'hair_care', 'styling_tools,'],\n",
       " ['beauty', 'skin_care', 'body,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['toys_games', 'puzzles', 'jigsaw_puzzles,'],\n",
       " ['baby_products', 'car_seats_accessories', 'accessories,'],\n",
       " ['beauty', 'bath_body', 'cleansers,'],\n",
       " ['baby_products', 'diapering', 'diaper_bags,'],\n",
       " ['toys_games', 'baby_toddler_toys,'],\n",
       " ['beauty', 'hair_care', 'styling_tools,'],\n",
       " ['toys_games', 'dolls_accessories', 'dolls,'],\n",
       " ['health_personal_care', 'household_supplies,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['toys_games', 'action_toy_figures', 'figures,'],\n",
       " ['baby_products', 'diapering', 'disposable_diapers,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['beauty', 'fragrance', 'men_s,'],\n",
       " ['toys_games', 'baby_toddler_toys', 'music_sound,'],\n",
       " ['toys_games', 'building_toys,'],\n",
       " ['beauty', 'makeup', 'eyes,'],\n",
       " ['toys_games', 'games', 'card_games,'],\n",
       " ['baby_products', 'nursery', 'bedding,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['toys_games', 'puzzles', 'pegged_puzzles,'],\n",
       " ['baby_products', 'safety', 'gates_doorways,'],\n",
       " ['grocery_gourmet_food', 'pantry_staples,'],\n",
       " ['beauty', 'makeup', 'nails,'],\n",
       " ['baby_products', 'pregnancy_maternity', 'maternity_pillows,'],\n",
       " ['toys_games', 'dolls_accessories', 'playsets,'],\n",
       " ['beauty', 'tools_accessories', 'mirrors,'],\n",
       " ['baby_products', 'feeding', 'highchairs_booster_seats,'],\n",
       " ['beauty', 'tools_accessories', 'mirrors,'],\n",
       " ['health_personal_care', 'health_care', 'cough_cold,'],\n",
       " ['toys_games', 'action_toy_figures', 'playsets,'],\n",
       " ['beauty', 'skin_care', 'body,'],\n",
       " ['toys_games', 'electronics_for_kids,'],\n",
       " ['toys_games', 'building_toys', 'building_sets,'],\n",
       " ['toys_games', 'baby_toddler_toys,'],\n",
       " ['beauty', 'hair_care', 'hair_color,'],\n",
       " ['health_personal_care', 'medical_supplies_equipment', 'daily_living_aids,'],\n",
       " ['health_personal_care', 'medical_supplies_equipment', 'health_monitors,'],\n",
       " ['toys_games', 'electronics_for_kids', 'systems_accessories,'],\n",
       " ['toys_games', 'hobbies', 'model_building_kits_tools,'],\n",
       " ['beauty', 'hair_care', 'conditioners,'],\n",
       " ['grocery_gourmet_food', 'snack_food', 'cookies,'],\n",
       " ['toys_games', 'novelty_gag_toys', 'magic_kits_accessories,'],\n",
       " ['toys_games', 'arts_crafts,'],\n",
       " ['toys_games', 'learning_education,'],\n",
       " ['baby_products', 'diapering', 'diaper_pails_refills,'],\n",
       " ['pet_supplies', 'cats', 'grooming,'],\n",
       " ['toys_games', 'dolls_accessories', 'dolls,'],\n",
       " ['toys_games', 'baby_toddler_toys', 'bath_toys,'],\n",
       " ['beauty', 'hair_care', 'styling_products,'],\n",
       " ['beauty', 'hair_care', 'shampoo_plus_conditioner,'],\n",
       " ['baby_products', 'gear', 'swings,'],\n",
       " ['baby_products', 'gear', 'baby_gyms_playmats,'],\n",
       " ['beauty', 'fragrance', 'women_s,'],\n",
       " ['grocery_gourmet_food', 'candy_chocolate', 'suckers_lollipops,'],\n",
       " ['toys_games', 'stuffed_animals_plush', 'animals_figures,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'nutrition_bars_drinks,'],\n",
       " ['health_personal_care', 'personal_care', 'oral_hygiene,'],\n",
       " ['beauty', 'fragrance', 'women_s,'],\n",
       " ['beauty', 'hair_care', 'styling_tools,'],\n",
       " ['toys_games', 'dolls_accessories', 'doll_accessories,'],\n",
       " ['beauty', 'makeup', 'nails,'],\n",
       " ['pet_supplies', 'fish_aquatic_pets', 'pumps_filters,'],\n",
       " ['health_personal_care', 'personal_care', 'oral_hygiene,'],\n",
       " ['health_personal_care', 'health_care', 'pill_cases_splitters,'],\n",
       " ['baby_products', 'diapering', 'disposable_diapers,'],\n",
       " ['baby_products', 'gear', 'swings,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['toys_games', 'learning_education', 'science,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['beauty', 'fragrance', 'men_s,'],\n",
       " ['grocery_gourmet_food', 'pantry_staples,'],\n",
       " ['toys_games', 'games', 'board_games,'],\n",
       " ['health_personal_care', 'household_supplies', 'cleaning_tools,'],\n",
       " ['baby_products', 'bathing_skin_care', 'gift_sets,'],\n",
       " ['health_personal_care', 'personal_care', 'oral_hygiene,'],\n",
       " ['baby_products', 'nursery', 'bedding,'],\n",
       " ['beauty', 'bath_body', 'cleansers,'],\n",
       " ['health_personal_care', 'health_care', 'sleep_snoring,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'weight_loss_products,'],\n",
       " ['health_personal_care', 'medical_supplies_equipment', 'health_monitors,'],\n",
       " ['health_personal_care', 'personal_care', 'oral_hygiene,'],\n",
       " ['toys_games', 'vehicles_remote_control', 'play_trains_railway_sets,'],\n",
       " ['beauty', 'hair_care', 'hair_loss_products,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'nutrition_bars_drinks,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['health_personal_care', 'personal_care', 'oral_hygiene,'],\n",
       " ['grocery_gourmet_food', 'pantry_staples,'],\n",
       " ['grocery_gourmet_food', 'beverages', 'tea,'],\n",
       " ['toys_games', 'puzzles', 'pegged_puzzles,'],\n",
       " ['pet_supplies', 'dogs', 'health_supplies,'],\n",
       " ['toys_games', 'games', 'board_games,'],\n",
       " ['toys_games', 'stuffed_animals_plush,'],\n",
       " ['beauty', 'fragrance', 'women_s,'],\n",
       " ['pet_supplies', 'dogs', 'training_behavior_aids,'],\n",
       " ['toys_games', 'dress_up_pretend_play', 'pretend_play,'],\n",
       " ['beauty', 'fragrance', 'men_s,'],\n",
       " ['health_personal_care',\n",
       "  'medical_supplies_equipment',\n",
       "  'bathroom_aids_safety,'],\n",
       " ['toys_games', 'stuffed_animals_plush', 'animals_figures,'],\n",
       " ['toys_games', 'tricycles', 'scooters_wagons,'],\n",
       " ['toys_games', 'dress_up_pretend_play', 'pretend_play,'],\n",
       " ['beauty', 'makeup', 'eyes,'],\n",
       " ['baby_products', 'gear', 'swings,'],\n",
       " ['toys_games', 'games', 'board_games,'],\n",
       " ['toys_games', 'baby_toddler_toys', 'activity_play_centers,'],\n",
       " ['grocery_gourmet_food',\n",
       "  'fresh_flowers_live_indoor_plants',\n",
       "  'live_indoor_plants,'],\n",
       " ['health_personal_care', 'health_care', 'therapeutic_skin_care,'],\n",
       " ['grocery_gourmet_food', 'snack_food', 'party_mix,'],\n",
       " ['health_personal_care', 'personal_care', 'lip_care_products,'],\n",
       " ['beauty', 'hair_care', 'styling_tools,'],\n",
       " ['pet_supplies', 'cats', 'health_supplies,'],\n",
       " ['health_personal_care', 'health_care', 'allergy,'],\n",
       " ['baby_products', 'nursery,'],\n",
       " ['toys_games', 'electronics_for_kids', 'systems_accessories,'],\n",
       " ['health_personal_care', 'health_care', 'first_aid,'],\n",
       " ['grocery_gourmet_food', 'pantry_staples,'],\n",
       " ['beauty', 'bath_body', 'cleansers,'],\n",
       " ['grocery_gourmet_food', 'candy_chocolate', 'gummy_candies,'],\n",
       " ['pet_supplies', 'cats', 'litter_housebreaking,'],\n",
       " ['baby_products', 'safety', 'monitors,'],\n",
       " ['toys_games', 'sports_outdoor_play', 'play_tents_tunnels,'],\n",
       " ['pet_supplies', 'dogs', 'carriers_travel_products,'],\n",
       " ['toys_games', 'baby_toddler_toys', 'blocks,'],\n",
       " ['health_personal_care', 'health_care', 'first_aid,'],\n",
       " ['beauty', 'makeup', 'nails,'],\n",
       " ['toys_games', 'games', 'board_games,'],\n",
       " ['pet_supplies', 'cats', 'feeding_watering_supplies,'],\n",
       " ['beauty', 'hair_care', 'shampoos,'],\n",
       " ['baby_products', 'strollers', 'joggers,'],\n",
       " ['baby_products', 'safety', 'kitchen_safety,'],\n",
       " ['health_personal_care', 'health_care', 'pill_cases_splitters,'],\n",
       " ['toys_games', 'games', 'card_games,'],\n",
       " ['grocery_gourmet_food', 'candy_chocolate', 'chocolate_bars,'],\n",
       " ['pet_supplies', 'dogs', 'collars,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'sports_supplements,'],\n",
       " ['health_personal_care', 'health_care', 'first_aid,'],\n",
       " ['beauty', 'skin_care', 'face,'],\n",
       " ['toys_games', 'action_toy_figures', 'figures,'],\n",
       " ['pet_supplies', 'dogs', 'food,'],\n",
       " ['health_personal_care', 'health_care', 'sleep_snoring,'],\n",
       " ['health_personal_care', 'health_care', 'cough_cold,'],\n",
       " ['health_personal_care',\n",
       "  'medical_supplies_equipment',\n",
       "  'mobility_aids_equipment,'],\n",
       " ['toys_games', 'sports_outdoor_play', 'pools_water_fun,'],\n",
       " ['grocery_gourmet_food', 'produce', 'fresh_fruits,'],\n",
       " ['baby_products', 'feeding', 'bottle_feeding,'],\n",
       " ['beauty', 'hair_care', 'hair_color,'],\n",
       " ['beauty', 'hair_care', 'styling_tools,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['baby_products', 'feeding', 'highchairs_booster_seats,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['toys_games', 'dress_up_pretend_play', 'pretend_play,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['toys_games', 'games', 'trading_card_games,'],\n",
       " ['pet_supplies', 'dogs', 'beds_furniture,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['beauty', 'hair_care', 'hair_scalp_treatments,'],\n",
       " ['health_personal_care', 'health_care', 'cough_cold,'],\n",
       " ['toys_games', 'puzzles', 'brain_teasers,'],\n",
       " ['health_personal_care', 'nutrition_wellness,'],\n",
       " ['toys_games', 'baby_toddler_toys', 'push_pull_toys,'],\n",
       " ['health_personal_care', 'sexual_wellness', 'adult_toys_games,'],\n",
       " ['health_personal_care', 'personal_care', 'lip_care_products,'],\n",
       " ['toys_games', 'action_toy_figures', 'playsets,'],\n",
       " ['baby_products', 'feeding', 'solid_feeding,'],\n",
       " ['health_personal_care', 'personal_care', 'oral_hygiene,'],\n",
       " ['grocery_gourmet_food', 'snack_food', 'chips_crisps,'],\n",
       " ['baby_products', 'safety', 'cabinet_locks_straps,'],\n",
       " ['beauty', 'hair_care', 'hair_relaxers,'],\n",
       " ['pet_supplies', 'dogs', 'health_supplies,'],\n",
       " ['pet_supplies', 'cats', 'grooming,'],\n",
       " ['pet_supplies', 'dogs', 'collars,'],\n",
       " ['pet_supplies', 'cats', 'health_supplies,'],\n",
       " ['health_personal_care', 'health_care', 'women_s_health,'],\n",
       " ['pet_supplies', 'dogs', 'treats,'],\n",
       " ['pet_supplies', 'dogs', 'collars,'],\n",
       " ['beauty', 'skin_care', 'face,'],\n",
       " ['toys_games', 'tricycles', 'scooters_wagons,'],\n",
       " ['baby_products', 'diapering,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'nutrition_bars_drinks,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['toys_games', 'action_toy_figures', 'accessories,'],\n",
       " ['toys_games', 'games', 'card_games,'],\n",
       " ['pet_supplies', 'cats', 'grooming,'],\n",
       " ['pet_supplies', 'dogs', 'health_supplies,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['health_personal_care', 'health_care', 'pain_relievers,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['beauty', 'hair_care', 'shampoos,'],\n",
       " ['grocery_gourmet_food', 'beverages', 'juices,'],\n",
       " ['grocery_gourmet_food', 'beverages', 'coffee,'],\n",
       " ['grocery_gourmet_food', 'candy_chocolate', 'gummy_candies,'],\n",
       " ['toys_games', 'sports_outdoor_play', 'ball_pits_accessories,'],\n",
       " ['grocery_gourmet_food', 'candy_chocolate', 'gum,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['health_personal_care', 'household_supplies', 'household_batteries,'],\n",
       " ['health_personal_care', 'medical_supplies_equipment', 'braces,'],\n",
       " ['toys_games', 'baby_toddler_toys', 'car_seat_stroller_toys,'],\n",
       " ['baby_products', 'strollers', 'joggers,'],\n",
       " ['health_personal_care', 'health_care', 'allergy,'],\n",
       " ['grocery_gourmet_food', 'pantry_staples,'],\n",
       " ['toys_games', 'games', 'board_games,'],\n",
       " ['pet_supplies', 'dogs', 'training_behavior_aids,'],\n",
       " ['baby_products', 'strollers', 'tandem,'],\n",
       " ['toys_games', 'arts_crafts', 'easels,'],\n",
       " ['beauty', 'hair_care', 'styling_products,'],\n",
       " ['beauty', 'tools_accessories', 'makeup_brushes_tools,'],\n",
       " ['toys_games', 'puzzles', 'jigsaw_puzzles,'],\n",
       " ['toys_games', 'dolls_accessories', 'dolls,'],\n",
       " ['health_personal_care', 'health_care', 'therapeutic_skin_care,'],\n",
       " ['toys_games', 'dress_up_pretend_play', 'pretend_play,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['toys_games', 'dress_up_pretend_play', 'pretend_play,'],\n",
       " ['baby_products', 'bathing_skin_care', 'bathing_tubs_seats,'],\n",
       " ['baby_products', 'diapering', 'diaper_pails_refills,'],\n",
       " ['toys_games', 'electronics_for_kids,'],\n",
       " ['pet_supplies', 'dogs', 'health_supplies,'],\n",
       " ['baby_products', 'bathing_skin_care', 'soaps_cleansers,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'nutrition_bars_drinks,'],\n",
       " ['grocery_gourmet_food', 'breakfast_foods', 'breakfast_cereal_bars,'],\n",
       " ['pet_supplies', 'dogs', 'beds_furniture,'],\n",
       " ['baby_products', 'diapering', 'diaper_pails_refills,'],\n",
       " ['grocery_gourmet_food', 'snack_food,'],\n",
       " ['toys_games', 'action_toy_figures', 'accessories,'],\n",
       " ['beauty', 'tools_accessories', 'mirrors,'],\n",
       " ['pet_supplies', 'dogs', 'grooming,'],\n",
       " ['toys_games', 'dress_up_pretend_play', 'pretend_play,'],\n",
       " ['baby_products', 'feeding', 'bottle_feeding,'],\n",
       " ['beauty', 'skin_care', 'eyes,'],\n",
       " ['beauty', 'fragrance', 'women_s,'],\n",
       " ['toys_games', 'electronics_for_kids,'],\n",
       " ['toys_games', 'action_toy_figures', 'playsets,'],\n",
       " ['health_personal_care', 'sexual_wellness', 'adult_toys_games,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['toys_games', 'baby_toddler_toys', 'push_pull_toys,'],\n",
       " ['toys_games', 'arts_crafts', 'easels,'],\n",
       " ['pet_supplies', 'cats', 'grooming,'],\n",
       " ['beauty', 'skin_care,'],\n",
       " ['beauty', 'hair_care', 'styling_tools,'],\n",
       " ['toys_games', 'games,'],\n",
       " ['beauty', 'hair_care', 'styling_tools,'],\n",
       " ['toys_games', 'action_toy_figures', 'figures,'],\n",
       " ['toys_games', 'baby_toddler_toys', 'music_sound,'],\n",
       " ['toys_games', 'baby_toddler_toys', 'music_sound,'],\n",
       " ['health_personal_care', 'health_care,'],\n",
       " ['health_personal_care', 'health_care', 'pain_relievers,'],\n",
       " ['beauty', 'hair_care', 'styling_tools,'],\n",
       " ['baby_products', 'strollers', 'accessories,'],\n",
       " ['toys_games', 'tricycles', 'scooters_wagons,'],\n",
       " ['baby_products', 'diapering', 'diaper_pails_refills,'],\n",
       " ['health_personal_care', 'personal_care', 'oral_hygiene,'],\n",
       " ['health_personal_care', 'health_care', 'first_aid,'],\n",
       " ['beauty', 'skin_care', 'face,'],\n",
       " ['beauty', 'hair_care', 'shampoos,'],\n",
       " ['health_personal_care', 'household_supplies', 'household_batteries,'],\n",
       " ['toys_games', 'electronics_for_kids', 'electronic_toys,'],\n",
       " ['health_personal_care', 'medical_supplies_equipment', 'health_monitors,'],\n",
       " ['baby_products', 'diapering', 'diaper_changing_kits,'],\n",
       " ['health_personal_care', 'health_care', 'pill_cases_splitters,'],\n",
       " ['beauty', 'fragrance', 'women_s,'],\n",
       " ['grocery_gourmet_food', 'candy_chocolate', 'chocolate_bars,'],\n",
       " ['toys_games', 'baby_toddler_toys', 'car_seat_stroller_toys,'],\n",
       " ['toys_games', 'sports_outdoor_play', 'gym_sets_swings,'],\n",
       " ['toys_games', 'games', 'board_games,'],\n",
       " ['beauty', 'hair_care', 'hair_loss_products,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['beauty', 'hair_care', 'styling_tools,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['grocery_gourmet_food', 'pantry_staples,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['beauty', 'skin_care', 'eyes,'],\n",
       " ['pet_supplies', 'cats', 'health_supplies,'],\n",
       " ['toys_games', 'novelty_gag_toys,'],\n",
       " ['baby_products', 'gear', 'backpacks_carriers,'],\n",
       " ['baby_products', 'gear', 'swings,'],\n",
       " ['baby_products', 'safety', 'edge_corner_guards,'],\n",
       " ['pet_supplies', 'dogs', 'feeding_watering_supplies,'],\n",
       " ['toys_games', 'dolls_accessories', 'dolls,'],\n",
       " ['toys_games', 'action_toy_figures', 'playsets,'],\n",
       " ['pet_supplies', 'cats', 'toys,'],\n",
       " ['health_personal_care', 'health_care', 'foot_care,'],\n",
       " ['toys_games', 'sports_outdoor_play', 'gardening_tools,'],\n",
       " ['pet_supplies', 'cats', 'food,'],\n",
       " ['grocery_gourmet_food', 'snack_food', 'crackers,'],\n",
       " ['health_personal_care', 'medical_supplies_equipment', 'braces,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['beauty', 'hair_care', 'styling_products,'],\n",
       " ['baby_products', 'nursery', 'furniture,'],\n",
       " ['grocery_gourmet_food', 'pantry_staples', 'packaged_meals_side_dishes,'],\n",
       " ['toys_games', 'stuffed_animals_plush', 'puppets,'],\n",
       " ['toys_games', 'electronics_for_kids', 'electronic_toys,'],\n",
       " ['health_personal_care', 'personal_care', 'oral_hygiene,'],\n",
       " ['toys_games', 'dolls_accessories', 'dolls,'],\n",
       " ['grocery_gourmet_food', 'pantry_staples,'],\n",
       " ['health_personal_care', 'health_care', 'pain_relievers,'],\n",
       " ['toys_games', 'puzzles', 'floor_puzzles,'],\n",
       " ['health_personal_care', 'household_supplies', 'cleaning_tools,'],\n",
       " ['pet_supplies', 'dogs', 'health_supplies,'],\n",
       " ['grocery_gourmet_food', 'pantry_staples,'],\n",
       " ['baby_products', 'strollers', 'accessories,'],\n",
       " ['toys_games', 'tricycles', 'scooters_wagons,'],\n",
       " ['toys_games', 'games,'],\n",
       " ['baby_products', 'strollers', 'joggers,'],\n",
       " ['health_personal_care', 'health_care', 'pain_relievers,'],\n",
       " ['toys_games', 'learning_education', 'mathematics_counting,'],\n",
       " ['pet_supplies', 'dogs', 'collars,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['pet_supplies', 'cats', 'food,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['baby_products', 'feeding', 'bottle_feeding,'],\n",
       " ['grocery_gourmet_food', 'pantry_staples', 'sauces,'],\n",
       " ['toys_games', 'sports_outdoor_play', 'pools_water_fun,'],\n",
       " ['grocery_gourmet_food', 'snack_food', 'popcorn,'],\n",
       " ['toys_games', 'arts_crafts', 'drawing_painting_supplies,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['toys_games', 'stuffed_animals_plush', 'puppets,'],\n",
       " ['toys_games', 'electronics_for_kids', 'electronic_toys,'],\n",
       " ['toys_games', 'building_toys', 'building_sets,'],\n",
       " ['beauty', 'fragrance', 'women_s,'],\n",
       " ['toys_games', 'vehicles_remote_control', 'play_vehicles,'],\n",
       " ['baby_products', 'feeding', 'bottle_feeding,'],\n",
       " ['toys_games', 'puzzles', 'jigsaw_puzzles,'],\n",
       " ['baby_products', 'nursery', 'bedding,'],\n",
       " ['beauty', 'skin_care', 'body,'],\n",
       " ['pet_supplies', 'fish_aquatic_pets', 'aquarium_heaters,'],\n",
       " ['beauty', 'fragrance', 'women_s,'],\n",
       " ['toys_games', 'electronics_for_kids', 'personal_video_players_accessories,'],\n",
       " ['pet_supplies', 'fish_aquatic_pets', 'aquariums,'],\n",
       " ['beauty', 'makeup', 'eyes,'],\n",
       " ['health_personal_care', 'medical_supplies_equipment', 'braces,'],\n",
       " ['pet_supplies', 'fish_aquatic_pets', 'aquariums,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['toys_games', 'sports_outdoor_play', 'sand_water_tables,'],\n",
       " ['beauty', 'skin_care', 'body,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['health_personal_care', 'household_supplies', 'household_cleaning,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'nutrition_bars_drinks,'],\n",
       " ['beauty', 'hair_care', 'shampoo_conditioner_sets,'],\n",
       " ['health_personal_care', 'health_care', 'first_aid,'],\n",
       " ['beauty', 'skin_care', 'body,'],\n",
       " ['toys_games', 'puzzles,'],\n",
       " ['pet_supplies', 'cats', 'beds_furniture,'],\n",
       " ['pet_supplies', 'cats', 'litter_housebreaking,'],\n",
       " ['grocery_gourmet_food', 'breakfast_foods', 'cereals,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'nutrition_bars_drinks,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['health_personal_care', 'health_care', 'alternative_medicine,'],\n",
       " ['pet_supplies', 'dogs', 'collars,'],\n",
       " ['toys_games', 'games,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['health_personal_care', 'medical_supplies_equipment', 'health_monitors,'],\n",
       " ['health_personal_care', 'health_care', 'incontinence,'],\n",
       " ['grocery_gourmet_food', 'beverages', 'tea,'],\n",
       " ['beauty', 'skin_care', 'face,'],\n",
       " ['pet_supplies', 'cats', 'health_supplies,'],\n",
       " ['toys_games', 'electronics_for_kids', 'music_players_karaoke,'],\n",
       " ['baby_products', 'gear', 'swings,'],\n",
       " ['health_personal_care', 'household_supplies', 'cleaning_tools,'],\n",
       " ['toys_games', 'electronics_for_kids', 'electronic_toys,'],\n",
       " ['pet_supplies', 'dogs', 'toys,'],\n",
       " ['pet_supplies', 'bunny_rabbit_central', 'houses_habitats,'],\n",
       " ['pet_supplies', 'dogs', 'houses,'],\n",
       " ['toys_games', 'dolls_accessories', 'dolls,'],\n",
       " ['beauty', 'fragrance', 'women_s,'],\n",
       " ['beauty', 'fragrance,'],\n",
       " ['health_personal_care', 'medical_supplies_equipment', 'braces,'],\n",
       " ['grocery_gourmet_food', 'pantry_staples', 'sauces,'],\n",
       " ['toys_games', 'novelty_gag_toys', 'magic_kits_accessories,'],\n",
       " ['beauty', 'fragrance', 'men_s,'],\n",
       " ['baby_products', 'feeding,'],\n",
       " ['grocery_gourmet_food', 'pantry_staples', 'canned_jarred_food,'],\n",
       " ['beauty', 'hair_care', 'hair_color,'],\n",
       " ['health_personal_care', 'personal_care', 'lip_care_products,'],\n",
       " ['toys_games', 'sports_outdoor_play', 'sports,'],\n",
       " ['health_personal_care', 'health_care', 'alternative_medicine,'],\n",
       " ['toys_games', 'baby_toddler_toys', 'stacking_nesting_toys,'],\n",
       " ['pet_supplies', 'birds', 'cages_accessories,'],\n",
       " ['pet_supplies', 'cats', 'beds_furniture,'],\n",
       " ['beauty', 'skin_care', 'face,'],\n",
       " ['pet_supplies', 'dogs', 'apparel_accessories,'],\n",
       " ['toys_games', 'hobbies,'],\n",
       " ['grocery_gourmet_food', 'snack_food', 'chips_crisps,'],\n",
       " ['toys_games', 'learning_education', 'science,'],\n",
       " ['grocery_gourmet_food', 'beverages', 'coffee,'],\n",
       " ['toys_games', 'baby_toddler_toys,'],\n",
       " ['baby_products', 'diapering', 'wipes_holders,'],\n",
       " ['toys_games', 'stuffed_animals_plush', 'animals_figures,'],\n",
       " ['pet_supplies', 'dogs', 'toys,'],\n",
       " ['toys_games', 'dolls_accessories', 'dollhouses,'],\n",
       " ['toys_games', 'action_toy_figures', 'playsets,'],\n",
       " ['health_personal_care', 'personal_care', 'oral_hygiene,'],\n",
       " ['pet_supplies', 'dogs', 'training_behavior_aids,'],\n",
       " ['grocery_gourmet_food', 'beverages', 'tea,'],\n",
       " ['baby_products', 'diapering', 'disposable_diapers,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['beauty', 'fragrance', 'women_s,'],\n",
       " ['health_personal_care', 'household_supplies', 'paper_plastic,'],\n",
       " ['health_personal_care', 'health_care', 'family_planning_contraceptives,'],\n",
       " ['pet_supplies', 'cats', 'educational_repellents,'],\n",
       " ['beauty', 'fragrance', 'women_s,'],\n",
       " ['grocery_gourmet_food', 'pantry_staples,'],\n",
       " ['health_personal_care', 'medical_supplies_equipment', 'braces,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'nutrition_bars_drinks,'],\n",
       " ['grocery_gourmet_food', 'snack_food', 'pretzels,'],\n",
       " ['pet_supplies', 'fish_aquatic_pets', 'water_treatments,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['beauty', 'bath_body', 'bath,'],\n",
       " ['pet_supplies', 'dogs', 'beds_furniture,'],\n",
       " ['baby_products', 'feeding', 'bottle_feeding,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'nutrition_bars_drinks,'],\n",
       " ['beauty', 'makeup', 'nails,'],\n",
       " ['beauty', 'bath_body', 'cleansers,'],\n",
       " ['beauty', 'tools_accessories', 'mirrors,'],\n",
       " ['baby_products', 'gear', 'swings,'],\n",
       " ['grocery_gourmet_food', 'pantry_staples,'],\n",
       " ['beauty', 'fragrance', 'women_s,'],\n",
       " ['baby_products', 'feeding', 'pillows_stools,'],\n",
       " ['pet_supplies', 'birds', 'cages_accessories,'],\n",
       " ['pet_supplies', 'fish_aquatic_pets', 'aquariums,'],\n",
       " ['health_personal_care', 'personal_care', 'lip_care_products,'],\n",
       " ['toys_games', 'novelty_gag_toys,'],\n",
       " ['pet_supplies', 'cats', 'feeding_watering_supplies,'],\n",
       " ['beauty', 'skin_care', 'body,'],\n",
       " ['toys_games', 'games', 'card_games,'],\n",
       " ['grocery_gourmet_food', 'candy_chocolate', 'gummy_candy,'],\n",
       " ['grocery_gourmet_food', 'pantry_staples,'],\n",
       " ['beauty', 'hair_care', 'styling_tools,'],\n",
       " ['baby_products', 'feeding', 'bottle_feeding,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'nutrition_bars_drinks,'],\n",
       " ['grocery_gourmet_food', 'pantry_staples', 'pasta_noodles,'],\n",
       " ['pet_supplies', 'fish_aquatic_pets', 'pumps_filters,'],\n",
       " ['baby_products', 'strollers', 'tandem,'],\n",
       " ['toys_games', 'hobbies', 'model_building_kits_tools,'],\n",
       " ['pet_supplies', 'dogs', 'training_behavior_aids,'],\n",
       " ['baby_products', 'bathing_skin_care', 'skin_care,'],\n",
       " ['beauty', 'hair_care', 'shampoo_plus_conditioner,'],\n",
       " ['toys_games', 'vehicles_remote_control', 'play_vehicles,'],\n",
       " ['baby_products', 'nursery', 'bedding,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'nutrition_bars_drinks,'],\n",
       " ['pet_supplies', 'cats', 'feeding_watering_supplies,'],\n",
       " ['health_personal_care', 'medical_supplies_equipment', 'braces,'],\n",
       " ['baby_products', 'potty_training', 'potties_seats,'],\n",
       " ['grocery_gourmet_food', 'candy_chocolate', 'suckers_lollipops,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'nutrition_bars_drinks,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['beauty', 'bath_body', 'cleansers,'],\n",
       " ['beauty', 'makeup', 'face,'],\n",
       " ['health_personal_care', 'sexual_wellness', 'adult_toys_games,'],\n",
       " ['pet_supplies', 'dogs', 'litter_housebreaking,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'nutrition_bars_drinks,'],\n",
       " ['beauty', 'bath_body,'],\n",
       " ['grocery_gourmet_food', 'pantry_staples,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['grocery_gourmet_food', 'gourmet_gifts', 'snack_gifts,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['pet_supplies', 'dogs', 'apparel_accessories,'],\n",
       " ['baby_products', 'safety', 'monitors,'],\n",
       " ['pet_supplies', 'cats', 'grooming,'],\n",
       " ['health_personal_care', 'health_care', 'thermometers,'],\n",
       " ['beauty', 'skin_care', 'face,'],\n",
       " ['baby_products', 'diapering', 'disposable_diapers,'],\n",
       " ['beauty', 'skin_care,'],\n",
       " ['toys_games', 'action_toy_figures', 'playsets,'],\n",
       " ['grocery_gourmet_food', 'beverages', 'tea,'],\n",
       " ['beauty', 'bath_body', 'bath,'],\n",
       " ['grocery_gourmet_food', 'pantry_staples', 'sauces,'],\n",
       " ['health_personal_care', 'medical_supplies_equipment', 'braces,'],\n",
       " ['baby_products', 'car_seats_accessories', 'car_seats,'],\n",
       " ['beauty', 'hair_care', 'hair_color,'],\n",
       " ['health_personal_care',\n",
       "  'medical_supplies_equipment',\n",
       "  'bathroom_aids_safety,'],\n",
       " ['beauty', 'hair_care', 'styling_tools,'],\n",
       " ['grocery_gourmet_food', 'candy_chocolate', 'fudge,'],\n",
       " ['baby_products', 'gear', 'activity_centers_entertainers,'],\n",
       " ['grocery_gourmet_food', 'breakfast_foods', 'cereals,'],\n",
       " ['baby_products', 'safety', 'monitors,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['grocery_gourmet_food', 'beverages', 'tea,'],\n",
       " ['health_personal_care', 'health_care', 'massage_relaxation,'],\n",
       " ['pet_supplies', 'dogs', 'toys,'],\n",
       " ['toys_games', 'tricycles', 'scooters_wagons,'],\n",
       " ['health_personal_care', 'personal_care', 'oral_hygiene,'],\n",
       " ['beauty', 'hair_care', 'conditioners,'],\n",
       " ['toys_games', 'dolls_accessories', 'playsets,'],\n",
       " ['baby_products', 'diapering', 'wipes_holders,'],\n",
       " ['beauty', 'hair_care', 'styling_tools,'],\n",
       " ['pet_supplies', 'dogs', 'training_behavior_aids,'],\n",
       " ['toys_games', 'electronics_for_kids', 'electronic_toys,'],\n",
       " ['pet_supplies', 'dogs', 'toys,'],\n",
       " ['beauty', 'hair_care', 'styling_tools,'],\n",
       " ['baby_products', 'feeding', 'highchairs_booster_seats,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'nutrition_bars_drinks,'],\n",
       " ['pet_supplies', 'dogs', 'grooming,'],\n",
       " ['toys_games', 'games', 'game_room_games,'],\n",
       " ['baby_products', 'feeding', 'highchairs_booster_seats,'],\n",
       " ['toys_games', 'stuffed_animals_plush', 'animals_figures,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['toys_games', 'stuffed_animals_plush', 'animals_figures,'],\n",
       " ['toys_games', 'action_toy_figures', 'statues,'],\n",
       " ['health_personal_care', 'personal_care', 'lip_care_products,'],\n",
       " ['grocery_gourmet_food', 'pantry_staples,'],\n",
       " ['beauty', 'skin_care', 'hands_nails,'],\n",
       " ['baby_products', 'diapering', 'disposable_diapers,'],\n",
       " ['health_personal_care', 'household_supplies', 'household_batteries,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'sports_supplements,'],\n",
       " ['health_personal_care', 'medical_supplies_equipment', 'health_monitors,'],\n",
       " ['baby_products', 'diapering', 'diaper_pails_refills,'],\n",
       " ['pet_supplies', 'fish_aquatic_pets', 'aquariums,'],\n",
       " ['health_personal_care', 'health_care', 'cough_cold,'],\n",
       " ['beauty', 'hair_care', 'styling_tools,'],\n",
       " ['beauty', 'fragrance', 'women_s,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['grocery_gourmet_food', 'pantry_staples,'],\n",
       " ['pet_supplies', 'cats', 'feeding_watering_supplies,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['toys_games', 'tricycles', 'scooters_wagons,'],\n",
       " ['grocery_gourmet_food', 'beverages', 'tea,'],\n",
       " ['beauty', 'bath_body', 'bathing_accessories,'],\n",
       " ['toys_games', 'electronics_for_kids,'],\n",
       " ['grocery_gourmet_food', 'beverages', 'coffee,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['toys_games', 'games', 'trading_card_games,'],\n",
       " ['baby_products', 'safety', 'harnesses_leashes,'],\n",
       " ['health_personal_care', 'medical_supplies_equipment', 'daily_living_aids,'],\n",
       " ['beauty', 'hair_care', 'styling_tools,'],\n",
       " ['baby_products', 'safety', 'monitors,'],\n",
       " ['toys_games', 'action_toy_figures', 'figures,'],\n",
       " ['health_personal_care', 'personal_care', 'oral_hygiene,'],\n",
       " ['baby_products', 'safety', 'gates_doorways,'],\n",
       " ['toys_games', 'dress_up_pretend_play', 'pretend_play,'],\n",
       " ['baby_products', 'feeding', 'pacifiers_accessories,'],\n",
       " ['beauty', 'fragrance', 'men_s,'],\n",
       " ['beauty', 'makeup', 'nails,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['beauty', 'hair_care', 'styling_tools,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['beauty', 'hair_care', 'styling_tools,'],\n",
       " ['baby_products', 'feeding', 'bottle_feeding,'],\n",
       " ['beauty', 'fragrance', 'women_s,'],\n",
       " ['grocery_gourmet_food', 'candy_chocolate', 'hard_candies,'],\n",
       " ['beauty', 'fragrance', 'women_s,'],\n",
       " ['beauty', 'hair_care', 'styling_products,'],\n",
       " ['toys_games', 'baby_toddler_toys', 'music_sound,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'nutrition_bars_drinks,'],\n",
       " ['grocery_gourmet_food', 'pantry_staples,'],\n",
       " ['health_personal_care', 'medical_supplies_equipment', 'health_monitors,'],\n",
       " ['beauty', 'makeup', 'eyes,'],\n",
       " ['beauty', 'hair_care', 'styling_tools,'],\n",
       " ['health_personal_care', 'health_care', 'foot_care,'],\n",
       " ['toys_games', 'arts_crafts', 'craft_kits,'],\n",
       " ['beauty', 'hair_care', 'styling_tools,'],\n",
       " ['beauty', 'hair_care', 'hair_color,'],\n",
       " ['toys_games', 'baby_toddler_toys,'],\n",
       " ['beauty', 'hair_care', 'styling_tools,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['toys_games', 'baby_toddler_toys', 'bath_toys,'],\n",
       " ['beauty', 'makeup', 'face,'],\n",
       " ['health_personal_care', 'baby_child_care,'],\n",
       " ['toys_games', 'sports_outdoor_play', 'gym_sets_swings,'],\n",
       " ['grocery_gourmet_food', 'pantry_staples,'],\n",
       " ['toys_games', 'puzzles', 'brain_teasers,'],\n",
       " ['pet_supplies', 'cats', 'beds_furniture,'],\n",
       " ['grocery_gourmet_food', 'beverages', 'tea,'],\n",
       " ['beauty', 'fragrance', 'men_s,'],\n",
       " ['toys_games', 'tricycles', 'scooters_wagons,'],\n",
       " ['baby_products', 'feeding', 'breastfeeding,'],\n",
       " ['toys_games', 'learning_education', 'basic_life_skills_toys,'],\n",
       " ['health_personal_care', 'health_care', 'digestion_nausea,'],\n",
       " ['beauty', 'hair_care', 'hair_color,'],\n",
       " ['pet_supplies', 'cats', 'beds_furniture,'],\n",
       " ['beauty', 'skin_care,'],\n",
       " ['beauty', 'fragrance', 'women_s,'],\n",
       " ['baby_products', 'nursery', 'bedding,'],\n",
       " ['health_personal_care', 'health_care', 'pill_cases_splitters,'],\n",
       " ['grocery_gourmet_food', 'beverages', 'tea,'],\n",
       " ['toys_games', 'action_toy_figures', 'figures,'],\n",
       " ['health_personal_care', 'health_care', 'pain_relievers,'],\n",
       " ['health_personal_care', 'personal_care', 'eye_care,'],\n",
       " ['health_personal_care', 'personal_care', 'feminine_care,'],\n",
       " ['toys_games', 'puzzles', 'jigsaw_puzzles,'],\n",
       " ['grocery_gourmet_food', 'candy_chocolate', 'chewing_gum,'],\n",
       " ['toys_games', 'electronics_for_kids', 'electronic_toys,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'nutrition_bars_drinks,'],\n",
       " ['baby_products', 'strollers', 'standard,'],\n",
       " ['toys_games', 'tricycles', 'scooters_wagons,'],\n",
       " ['grocery_gourmet_food', 'snack_food', 'popcorn,'],\n",
       " ['health_personal_care', 'personal_care', 'ear_care,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'sports_supplements,'],\n",
       " ['health_personal_care', 'personal_care', 'lip_care_products,'],\n",
       " ['toys_games', 'puzzles', 'pegged_puzzles,'],\n",
       " ['baby_products', 'feeding', 'bottle_feeding,'],\n",
       " ['grocery_gourmet_food', 'snack_food', 'cookies,'],\n",
       " ['health_personal_care', 'health_care', 'massage_relaxation,'],\n",
       " ['health_personal_care', 'household_supplies', 'lighters,'],\n",
       " ['health_personal_care', 'personal_care', 'oral_hygiene,'],\n",
       " ['pet_supplies', 'dogs', 'collars,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'nutrition_bars_drinks,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['toys_games', 'baby_toddler_toys,'],\n",
       " ['toys_games', 'dress_up_pretend_play', 'pretend_play,'],\n",
       " ['baby_products', 'gifts', 'toy_banks,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'nutrition_bars_drinks,'],\n",
       " ['pet_supplies', 'dogs', 'collars,'],\n",
       " ['baby_products', 'nursery', 'furniture,'],\n",
       " ['beauty', 'bath_body', 'scrubs_body_treatments,'],\n",
       " ['toys_games', 'games,'],\n",
       " ['health_personal_care', 'household_supplies', 'dishwashing,'],\n",
       " ['grocery_gourmet_food', 'beverages', 'tea,'],\n",
       " ['pet_supplies', 'dogs', 'beds_furniture,'],\n",
       " ['pet_supplies', 'cats', 'carriers_strollers,'],\n",
       " ['grocery_gourmet_food', 'pantry_staples,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['baby_products', 'safety', 'gates_doorways,'],\n",
       " ['toys_games', 'stuffed_animals_plush', 'animals_figures,'],\n",
       " ['baby_products', 'car_seats_accessories', 'car_seats,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'nutrition_bars_drinks,'],\n",
       " ['health_personal_care', 'medical_supplies_equipment', 'braces,'],\n",
       " ['grocery_gourmet_food', 'beverages', 'tea,'],\n",
       " ['baby_products', 'safety', 'kitchen_safety,'],\n",
       " ['baby_products', 'nursery', 'bedding,'],\n",
       " ['toys_games', 'sports_outdoor_play', 'pogo_sticks_hoppers,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['toys_games', 'vehicles_remote_control', 'play_vehicles,'],\n",
       " ['health_personal_care', 'household_supplies', 'dishwashing,'],\n",
       " ['toys_games', 'games', 'card_games,'],\n",
       " ['pet_supplies', 'cats', 'health_supplies,'],\n",
       " ['beauty', 'bath_body', 'sets,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['beauty', 'makeup', 'face,'],\n",
       " ['baby_products', 'gear', 'backpacks_carriers,'],\n",
       " ['health_personal_care', 'household_supplies', 'cleaning_tools,'],\n",
       " ['baby_products', 'nursery,'],\n",
       " ['toys_games', 'electronics_for_kids', 'plug_play_video_games,'],\n",
       " ['grocery_gourmet_food', 'beverages', 'tea,'],\n",
       " ['toys_games', 'games', 'board_games,'],\n",
       " ['toys_games', 'arts_crafts', 'drawing_painting_supplies,'],\n",
       " ['baby_products', 'diapering', 'disposable_diapers,'],\n",
       " ['health_personal_care', 'personal_care', 'lip_care_products,'],\n",
       " ['toys_games', 'dress_up_pretend_play', 'pretend_play,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['beauty', 'makeup', 'face,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['beauty', 'hair_care', 'styling_tools,'],\n",
       " ['health_personal_care', 'household_supplies', 'cleaning_tools,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['health_personal_care', 'health_care,'],\n",
       " ['beauty', 'skin_care', 'face,'],\n",
       " ['health_personal_care', 'health_care', 'massage_relaxation,'],\n",
       " ['pet_supplies', 'dogs', 'treats,'],\n",
       " ['health_personal_care', 'health_care', 'incontinence,'],\n",
       " ['toys_games', 'vehicles_remote_control', 'play_vehicles,'],\n",
       " ['beauty', 'skin_care', 'face,'],\n",
       " ['toys_games', 'dolls_accessories', 'playsets,'],\n",
       " ['baby_products', 'gear', 'baby_gyms_playmats,'],\n",
       " ['beauty', 'fragrance', 'women_s,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['baby_products', 'diapering', 'diaper_pails_refills,'],\n",
       " ['beauty', 'hair_care', 'hair_color,'],\n",
       " ['health_personal_care', 'medical_supplies_equipment', 'braces,'],\n",
       " ['health_personal_care', 'medical_supplies_equipment', 'braces,'],\n",
       " ['toys_games', 'action_toy_figures', 'playsets,'],\n",
       " ['beauty', 'skin_care', 'hands_nails,'],\n",
       " ['pet_supplies', 'dogs', 'houses,'],\n",
       " ['health_personal_care', 'health_care', 'first_aid,'],\n",
       " ['beauty', 'fragrance', 'men_s,'],\n",
       " ['toys_games', 'grown_up_toys,'],\n",
       " ['health_personal_care', 'health_care', 'pill_cases_splitters,'],\n",
       " ['toys_games', 'electronics_for_kids,'],\n",
       " ['beauty', 'hair_care', 'styling_tools,'],\n",
       " ['grocery_gourmet_food', 'beverages', 'tea,'],\n",
       " ['health_personal_care', 'health_care', 'cough_cold,'],\n",
       " ['health_personal_care', 'health_care', 'foot_care,'],\n",
       " ['toys_games', 'action_toy_figures', 'figures,'],\n",
       " ['toys_games', 'hobbies', 'model_building_kits_tools,'],\n",
       " ['health_personal_care', 'baby_child_care,'],\n",
       " ['toys_games', 'dolls_accessories', 'dolls,'],\n",
       " ['health_personal_care', 'health_care', 'pain_relievers,'],\n",
       " ['health_personal_care', 'health_care', 'first_aid,'],\n",
       " ['health_personal_care', 'health_care', 'first_aid,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['baby_products', 'safety', 'gates_doorways,'],\n",
       " ['baby_products', 'feeding', 'solid_feeding,'],\n",
       " ['baby_products', 'gear', 'swings,'],\n",
       " ['grocery_gourmet_food', 'beverages', 'hot_cocoa,'],\n",
       " ['health_personal_care', 'personal_care', 'oral_hygiene,'],\n",
       " ['toys_games', 'dolls_accessories', 'dollhouses,'],\n",
       " ['health_personal_care',\n",
       "  'medical_supplies_equipment',\n",
       "  'bathroom_aids_safety,'],\n",
       " ['toys_games', 'action_toy_figures', 'figures,'],\n",
       " ['grocery_gourmet_food', 'sauces_dips', 'sauces,'],\n",
       " ['health_personal_care', 'baby_child_care,'],\n",
       " ['health_personal_care', 'personal_care', 'oral_hygiene,'],\n",
       " ['toys_games', 'tricycles', 'scooters_wagons,'],\n",
       " ['toys_games', 'games', 'board_games,'],\n",
       " ['baby_products', 'gear', 'baby_gyms_playmats,'],\n",
       " ['health_personal_care', 'health_care', 'first_aid,'],\n",
       " ['toys_games', 'building_toys', 'building_sets,'],\n",
       " ['beauty', 'makeup', 'nails,'],\n",
       " ['toys_games', 'baby_toddler_toys,'],\n",
       " ['toys_games', 'building_toys', 'building_sets,'],\n",
       " ['toys_games', 'puzzles', 'floor_puzzles,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['pet_supplies', 'cats', 'toys,'],\n",
       " ['grocery_gourmet_food', 'beverages', 'tea,'],\n",
       " ['pet_supplies', 'cats', 'grooming,'],\n",
       " ['baby_products', 'gear', 'swings,'],\n",
       " ['pet_supplies', 'dogs', 'health_supplies,'],\n",
       " ['pet_supplies', 'cats', 'feeding_watering_supplies,'],\n",
       " ['toys_games', 'action_toy_figures', 'playsets,'],\n",
       " ['beauty', 'skin_care', 'eyes,'],\n",
       " ['pet_supplies', 'dogs', 'apparel_accessories,'],\n",
       " ['pet_supplies', 'dogs', 'treats,'],\n",
       " ['beauty', 'hair_care', 'styling_tools,'],\n",
       " ['pet_supplies', 'dogs', 'collars,'],\n",
       " ['beauty', 'makeup', 'nails,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'nutrition_bars_drinks,'],\n",
       " ['pet_supplies', 'dogs', 'health_supplies,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'nutrition_bars_drinks,'],\n",
       " ['toys_games', 'games', 'tile_games,'],\n",
       " ['health_personal_care', 'health_care', 'alternative_medicine,'],\n",
       " ['beauty', 'bath_body', 'cleansers,'],\n",
       " ['toys_games', 'games', 'board_games,'],\n",
       " ['health_personal_care', 'medical_supplies_equipment', 'daily_living_aids,'],\n",
       " ['health_personal_care', 'household_supplies', 'household_cleaning,'],\n",
       " ['pet_supplies', 'dogs', 'toys,'],\n",
       " ['beauty', 'makeup', 'eyes,'],\n",
       " ['baby_products', 'gear', 'swings,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['toys_games', 'sports_outdoor_play', 'pools_water_fun,'],\n",
       " ['pet_supplies', 'cats', 'beds_furniture,'],\n",
       " ['pet_supplies', 'cats', 'feeding_watering_supplies,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['beauty', 'skin_care,'],\n",
       " ['baby_products', 'pregnancy_maternity', 'maternity_pillows,'],\n",
       " ['toys_games', 'action_toy_figures', 'figures,'],\n",
       " ['pet_supplies', 'cats', 'feeding_watering_supplies,'],\n",
       " ['pet_supplies', 'cats', 'feeding_watering_supplies,'],\n",
       " ['health_personal_care', 'household_supplies', 'paper_plastic,'],\n",
       " ['beauty', 'hair_care', 'styling_tools,'],\n",
       " ['toys_games', 'baby_toddler_toys', 'activity_play_centers,'],\n",
       " ['toys_games', 'baby_toddler_toys', 'activity_play_centers,'],\n",
       " ['grocery_gourmet_food', 'beverages', 'coffee,'],\n",
       " ['grocery_gourmet_food', 'candy_chocolate', 'licorice,'],\n",
       " ['pet_supplies', 'dogs', 'health_supplies,'],\n",
       " ['grocery_gourmet_food', 'candy_chocolate', 'taffy,'],\n",
       " ['pet_supplies', 'dogs', 'grooming,'],\n",
       " ['toys_games', 'novelty_gag_toys,'],\n",
       " ['beauty', 'hair_care', 'conditioners,'],\n",
       " ['toys_games', 'action_toy_figures', 'figures,'],\n",
       " ['toys_games', 'learning_education', 'geography,'],\n",
       " ['pet_supplies', 'dogs', 'collars,'],\n",
       " ['toys_games', 'baby_toddler_toys', 'rattles,'],\n",
       " ['pet_supplies', 'dogs', 'toys,'],\n",
       " ['health_personal_care', 'medical_supplies_equipment', 'daily_living_aids,'],\n",
       " ['toys_games', 'games', 'board_games,'],\n",
       " ['health_personal_care', 'medical_supplies_equipment', 'daily_living_aids,'],\n",
       " ['beauty', 'hair_care', 'styling_tools,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'nutrition_bars_drinks,'],\n",
       " ['toys_games', 'dress_up_pretend_play', 'pretend_play,'],\n",
       " ['toys_games', 'electronics_for_kids', 'electronic_toys,'],\n",
       " ['pet_supplies', 'cats', 'health_supplies,'],\n",
       " ['pet_supplies', 'cats', 'food,'],\n",
       " ['health_personal_care', 'medical_supplies_equipment', 'braces,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['baby_products', 'feeding', 'highchairs_booster_seats,'],\n",
       " ['beauty', 'fragrance,'],\n",
       " ['beauty', 'skin_care', 'eyes,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['pet_supplies', 'dogs', 'collars,'],\n",
       " ['baby_products', 'diapering', 'disposable_diapers,'],\n",
       " ['toys_games', 'puzzles', 'pegged_puzzles,'],\n",
       " ['toys_games', 'dress_up_pretend_play', 'pretend_play,'],\n",
       " ['baby_products', 'potty_training', 'seat_covers,'],\n",
       " ['toys_games', 'sports_outdoor_play', 'gym_sets_swings,'],\n",
       " ['pet_supplies', 'cats', 'beds_furniture,'],\n",
       " ['baby_products', 'feeding', 'solid_feeding,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['baby_products', 'nursery', 'furniture,'],\n",
       " ['toys_games', 'games', 'board_games,'],\n",
       " ['pet_supplies', 'dogs', 'collars,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['health_personal_care', 'health_care', 'foot_care,'],\n",
       " ['pet_supplies', 'dogs', 'toys,'],\n",
       " ['pet_supplies', 'cats', 'toys,'],\n",
       " ['beauty', 'skin_care', 'hands_nails,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['beauty', 'skin_care', 'face,'],\n",
       " ['grocery_gourmet_food', 'produce', 'fresh_fruits,'],\n",
       " ['baby_products', 'nursery', 'nursery_d_cor,'],\n",
       " ['baby_products', 'bathing_skin_care', 'grooming_healthcare_kits,'],\n",
       " ['toys_games', 'baby_toddler_toys', 'push_pull_toys,'],\n",
       " ['grocery_gourmet_food', 'candy_chocolate', 'suckers_lollipops,'],\n",
       " ['toys_games', 'dolls_accessories', 'dolls,'],\n",
       " ['pet_supplies', 'cats', 'toys,'],\n",
       " ['grocery_gourmet_food', 'pantry_staples,'],\n",
       " ['toys_games', 'dress_up_pretend_play', 'pretend_play,'],\n",
       " ['health_personal_care', 'household_supplies', 'cleaning_tools,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['toys_games', 'sports_outdoor_play', 'sandboxes_accessories,'],\n",
       " ['health_personal_care', 'health_care', 'therapeutic_skin_care,'],\n",
       " ['grocery_gourmet_food', 'beverages', 'coconut_water,'],\n",
       " ['beauty', 'makeup', 'nails,'],\n",
       " ['pet_supplies', 'cats', 'feeding_watering_supplies,'],\n",
       " ['beauty', 'hair_care', 'styling_tools,'],\n",
       " ['beauty', 'hair_care', 'hair_color,'],\n",
       " ['grocery_gourmet_food', 'breads_bakery', 'breads,'],\n",
       " ['beauty', 'makeup', 'face,'],\n",
       " ['pet_supplies', 'dogs', 'litter_housebreaking,'],\n",
       " ['toys_games', 'learning_education', 'geography,'],\n",
       " ['health_personal_care',\n",
       "  'medical_supplies_equipment',\n",
       "  'mobility_aids_equipment,'],\n",
       " ['pet_supplies', 'dogs', 'health_supplies,'],\n",
       " ['baby_products', 'potty_training', 'potties_seats,'],\n",
       " ['toys_games', 'games', 'board_games,'],\n",
       " ['toys_games', 'puzzles', 'floor_puzzles,'],\n",
       " ['beauty', 'skin_care', 'hands_nails,'],\n",
       " ['toys_games', 'sports_outdoor_play', 'gym_sets_swings,'],\n",
       " ['toys_games', 'puzzles', 'floor_puzzles,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['health_personal_care', 'household_supplies', 'household_cleaning,'],\n",
       " ['health_personal_care', 'health_care', 'pain_relievers,'],\n",
       " ['pet_supplies', 'dogs', 'beds_furniture,'],\n",
       " ['baby_products', 'feeding', 'breastfeeding,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'nutrition_bars_drinks,'],\n",
       " ['health_personal_care', 'household_supplies', 'cleaning_tools,'],\n",
       " ['pet_supplies', 'dogs', 'houses,'],\n",
       " ['health_personal_care', 'health_care', 'first_aid,'],\n",
       " ['health_personal_care', 'household_supplies', 'household_batteries,'],\n",
       " ['health_personal_care', 'health_care', 'foot_care,'],\n",
       " ['pet_supplies', 'dogs', 'houses,'],\n",
       " ['baby_products', 'bathing_skin_care', 'bathing_tubs_seats,'],\n",
       " ['health_personal_care', 'personal_care', 'oral_hygiene,'],\n",
       " ['beauty', 'hair_care', 'styling_tools,'],\n",
       " ['toys_games', 'electronics_for_kids', 'electronic_toys,'],\n",
       " ['pet_supplies', 'dogs', 'doors,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['health_personal_care', 'household_supplies', 'household_cleaning,'],\n",
       " ['grocery_gourmet_food', 'pantry_staples,'],\n",
       " ['toys_games', 'arts_crafts', 'drawing_painting_supplies,'],\n",
       " ['baby_products', 'feeding', 'highchairs_booster_seats,'],\n",
       " ['toys_games', 'dress_up_pretend_play', 'pretend_play,'],\n",
       " ['pet_supplies', 'cats', 'litter_housebreaking,'],\n",
       " ['toys_games', 'learning_education', 'reading_writing,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['baby_products', 'health_baby_care', 'teethers,'],\n",
       " ['health_personal_care', 'health_care', 'alternative_medicine,'],\n",
       " ['toys_games', 'building_toys', 'building_sets,'],\n",
       " ['pet_supplies', 'dogs', 'houses,'],\n",
       " ['grocery_gourmet_food', 'breakfast_foods', 'cereals,'],\n",
       " ['health_personal_care', 'nutrition_wellness,'],\n",
       " ['toys_games', 'arts_crafts,'],\n",
       " ['baby_products', 'diapering', 'disposable_diapers,'],\n",
       " ['pet_supplies', 'cats', 'litter_housebreaking,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['toys_games', 'building_toys', 'building_sets,'],\n",
       " ['grocery_gourmet_food', 'beverages', 'tea,'],\n",
       " ['beauty', 'bath_body', 'cleansers,'],\n",
       " ['baby_products', 'gear', 'backpacks_carriers,'],\n",
       " ['beauty', 'skin_care', 'face,'],\n",
       " ['baby_products', 'gear', 'baby_gyms_playmats,'],\n",
       " ['pet_supplies', 'dogs', 'grooming,'],\n",
       " ['beauty', 'tools_accessories', 'nail_tools,'],\n",
       " ['toys_games', 'electronics_for_kids,'],\n",
       " ['pet_supplies', 'dogs', 'collars,'],\n",
       " ['beauty', 'skin_care', 'body,'],\n",
       " ['toys_games', 'sports_outdoor_play', 'blasters_foam_play,'],\n",
       " ['health_personal_care', 'health_care,'],\n",
       " ['beauty', 'tools_accessories', 'makeup_brushes_tools,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['toys_games', 'sports_outdoor_play', 'blasters_foam_play,'],\n",
       " ['baby_products', 'diapering', 'disposable_diapers,'],\n",
       " ['toys_games', 'baby_toddler_toys', 'music_sound,'],\n",
       " ['baby_products', 'gear', 'swings,'],\n",
       " ['pet_supplies', 'dogs', 'food,'],\n",
       " ['grocery_gourmet_food', 'pantry_staples,'],\n",
       " ['toys_games', 'baby_toddler_toys,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['baby_products', 'diapering', 'disposable_diapers,'],\n",
       " ['beauty', 'hair_care', 'styling_tools,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['grocery_gourmet_food', 'beverages', 'coffee,'],\n",
       " ['toys_games', 'arts_crafts', 'drawing_painting_supplies,'],\n",
       " ['baby_products', 'pregnancy_maternity', 'maternity_pillows,'],\n",
       " ['beauty', 'fragrance', 'men_s,'],\n",
       " ['beauty', 'makeup', 'nails,'],\n",
       " ['beauty', 'hair_care', 'styling_tools,'],\n",
       " ['beauty', 'skin_care', 'face,'],\n",
       " ['beauty', 'hair_care,'],\n",
       " ['beauty', 'fragrance', 'women_s,'],\n",
       " ['toys_games', 'games', 'board_games,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['toys_games', 'games', 'board_games,'],\n",
       " ['toys_games', 'novelty_gag_toys', 'gag_toys_practical_jokes,'],\n",
       " ['toys_games', 'hobbies', 'trains_accessories,'],\n",
       " ['beauty', 'tools_accessories', 'mirrors,'],\n",
       " ['toys_games', 'learning_education', 'science,'],\n",
       " ['beauty', 'hair_care', 'styling_tools,'],\n",
       " ['baby_products', 'bathing_skin_care', 'bathing_tubs_seats,'],\n",
       " ['health_personal_care', 'health_care', 'allergy,'],\n",
       " ['beauty', 'tools_accessories', 'mirrors,'],\n",
       " ['baby_products', 'feeding', 'breastfeeding,'],\n",
       " ['toys_games', 'baby_toddler_toys', 'bath_toys,'],\n",
       " ['toys_games', 'learning_education', 'science,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['grocery_gourmet_food',\n",
       "  'fresh_flowers_live_indoor_plants',\n",
       "  'live_indoor_plants,'],\n",
       " ['health_personal_care', 'personal_care', 'oral_hygiene,'],\n",
       " ['health_personal_care', 'health_care', 'pain_relievers,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['health_personal_care', 'household_supplies', 'household_cleaning,'],\n",
       " ['toys_games', 'action_toy_figures', 'playsets,'],\n",
       " ['toys_games', 'building_toys', 'building_sets,'],\n",
       " ['toys_games', 'novelty_gag_toys', 'wind_up_toys,'],\n",
       " ['pet_supplies', 'fish_aquatic_pets', 'pumps_filters,'],\n",
       " ['toys_games', 'grown_up_toys,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['toys_games', 'hobbies', 'model_building_kits_tools,'],\n",
       " ['pet_supplies', 'cats', 'collars,'],\n",
       " ['toys_games', 'games', 'card_games,'],\n",
       " ['pet_supplies', 'dogs', 'carriers_travel_products,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'vitamins_supplements,'],\n",
       " ['toys_games', 'electronics_for_kids', 'electronic_toys,'],\n",
       " ['health_personal_care', 'medical_supplies_equipment', 'health_monitors,'],\n",
       " ['baby_products', 'nursery', 'bedding,'],\n",
       " ['health_personal_care', 'nutrition_wellness', 'nutrition_bars_drinks,'],\n",
       " ['health_personal_care', 'household_supplies', 'cleaning_tools,'],\n",
       " ['pet_supplies', 'cats', 'litter_housebreaking,'],\n",
       " ['health_personal_care', 'household_supplies', 'household_cleaning,'],\n",
       " ['health_personal_care', 'personal_care', 'oral_hygiene,'],\n",
       " ['beauty', 'makeup', 'nails,'],\n",
       " ['beauty', 'bath_body', 'cleansers,'],\n",
       " ['health_personal_care', 'personal_care', 'oral_hygiene,'],\n",
       " ['beauty', 'tools_accessories', 'mirrors,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['pet_supplies', 'dogs', 'houses,'],\n",
       " ['pet_supplies', 'dogs', 'toys,'],\n",
       " ['health_personal_care', 'personal_care', 'lip_care_products,'],\n",
       " ['baby_products', 'nursery', 'furniture,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['baby_products', 'feeding', 'breastfeeding,'],\n",
       " ['beauty', 'hair_care', 'shampoos,'],\n",
       " ['pet_supplies', 'birds', 'feeding_watering_supplies,'],\n",
       " ['grocery_gourmet_food', 'beverages', 'juices,'],\n",
       " ['health_personal_care', 'medical_supplies_equipment', 'braces,'],\n",
       " ['baby_products', 'safety', 'gates_doorways,'],\n",
       " ['baby_products', 'strollers', 'accessories,'],\n",
       " ['health_personal_care', 'personal_care', 'eye_care,'],\n",
       " ['grocery_gourmet_food', 'pantry_staples,'],\n",
       " ['beauty', 'fragrance', 'women_s,'],\n",
       " ['pet_supplies', 'dogs', 'treats,'],\n",
       " ['grocery_gourmet_food', 'beverages', 'tea,'],\n",
       " ['baby_products', 'safety', 'monitors,'],\n",
       " ['toys_games', 'baby_toddler_toys', 'car_seat_stroller_toys,'],\n",
       " ['health_personal_care',\n",
       "  'medical_supplies_equipment',\n",
       "  'mobility_aids_equipment,'],\n",
       " ['health_personal_care', 'personal_care', 'oral_hygiene,'],\n",
       " ['grocery_gourmet_food', 'beverages', 'coffee,'],\n",
       " ['toys_games', 'dolls_accessories', 'dolls,'],\n",
       " ['toys_games', 'novelty_gag_toys,'],\n",
       " ['baby_products', 'diapering', 'diaper_pails_refills,'],\n",
       " ['baby_products', 'car_seats_accessories', 'car_seats,'],\n",
       " ['health_personal_care', 'sexual_wellness', 'adult_toys_games,'],\n",
       " ['baby_products', 'diapering', 'diaper_pails_refills,'],\n",
       " ['beauty', 'hair_care', 'styling_tools,'],\n",
       " ['toys_games', 'electronics_for_kids', 'systems_accessories,'],\n",
       " ['toys_games', 'stuffed_animals_plush', 'animals_figures,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['pet_supplies', 'dogs', 'health_supplies,'],\n",
       " ['toys_games', 'sports_outdoor_play', 'sports,'],\n",
       " ['health_personal_care', 'personal_care', 'shaving_hair_removal,'],\n",
       " ['toys_games', 'stuffed_animals_plush', 'animals_figures,'],\n",
       " ['pet_supplies', 'dogs', 'collars,'],\n",
       " ['baby_products', 'diapering,'],\n",
       " ['health_personal_care',\n",
       "  'medical_supplies_equipment',\n",
       "  'mobility_aids_equipment,'],\n",
       " ['health_personal_care', 'personal_care', 'eye_care,'],\n",
       " ...]"
      ]
     },
     "execution_count": 145,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_set"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['0\\tgrocery_gourmet_food\\n',\n",
       " '1\\tmeat_poultry\\n',\n",
       " '2\\tjerky\\n',\n",
       " '3\\ttoys_games\\n',\n",
       " '4\\tgames\\n',\n",
       " '5\\tpuzzles\\n',\n",
       " '6\\tjigsaw_puzzles\\n',\n",
       " '7\\tboard_games\\n',\n",
       " '8\\tbeverages\\n',\n",
       " '9\\tjuices\\n',\n",
       " '10\\tbeauty\\n',\n",
       " '11\\tmakeup\\n',\n",
       " '12\\tnails\\n',\n",
       " '13\\tarts_crafts\\n',\n",
       " '14\\tdrawing_painting_supplies\\n',\n",
       " '15\\taction_toy_figures\\n',\n",
       " '16\\tfigures\\n',\n",
       " '17\\tdolls_accessories\\n',\n",
       " '18\\tdolls\\n',\n",
       " '19\\tcard_games\\n',\n",
       " '20\\tdrawing_sketching_tablets\\n',\n",
       " '21\\tbaby_toddler_toys\\n',\n",
       " '22\\tshape_sorters\\n',\n",
       " '23\\thealth_personal_care\\n',\n",
       " '24\\tpersonal_care\\n',\n",
       " '25\\tdeodorants_antiperspirants\\n',\n",
       " '26\\tnutrition_wellness\\n',\n",
       " '27\\tnutrition_bars_drinks\\n',\n",
       " '28\\tlearning_education\\n',\n",
       " '29\\thabitats\\n',\n",
       " '30\\telectronics_for_kids\\n',\n",
       " '31\\thousehold_supplies\\n',\n",
       " '32\\thousehold_batteries\\n',\n",
       " '33\\tpush_pull_toys\\n',\n",
       " '34\\tstuffed_animals_plush\\n',\n",
       " '35\\ttricycles\\n',\n",
       " '36\\tscooters_wagons\\n',\n",
       " '37\\tclay_dough\\n',\n",
       " '38\\thealth_care\\n',\n",
       " '39\\tallergy\\n',\n",
       " '40\\tbaby_products\\n',\n",
       " '41\\tgear\\n',\n",
       " '42\\tbaby_gyms_playmats\\n',\n",
       " '43\\tshaving_hair_removal\\n',\n",
       " '44\\tskin_care\\n',\n",
       " '45\\tface\\n',\n",
       " '46\\tanimals_figures\\n',\n",
       " '47\\tfeminine_care\\n',\n",
       " '48\\tmusic_sound\\n',\n",
       " '49\\toral_hygiene\\n',\n",
       " '50\\tgrown_up_toys\\n',\n",
       " '51\\tdress_up_pretend_play\\n',\n",
       " '52\\tpretend_play\\n',\n",
       " '53\\tnovelty_gag_toys\\n',\n",
       " '54\\tbath_body\\n',\n",
       " '55\\tcleansers\\n',\n",
       " '56\\tplaysets\\n',\n",
       " '57\\td_puzzles\\n',\n",
       " '58\\tdollhouses\\n',\n",
       " '59\\tlip_care_products\\n',\n",
       " '60\\ttools_accessories\\n',\n",
       " '61\\tnail_tools\\n',\n",
       " '62\\teye_care\\n',\n",
       " '63\\tpill_cases_splitters\\n',\n",
       " '64\\thair_care\\n',\n",
       " '65\\tstyling_products\\n',\n",
       " '66\\telectronic_toys\\n',\n",
       " '67\\tbody\\n',\n",
       " '68\\ttoy_balls\\n',\n",
       " '69\\teyes\\n',\n",
       " '70\\ttrading_card_games\\n',\n",
       " '71\\tfoot_care\\n',\n",
       " '72\\thands_nails\\n',\n",
       " '73\\tsun\\n',\n",
       " '74\\tmedical_supplies_equipment\\n',\n",
       " '75\\tdaily_living_aids\\n',\n",
       " '76\\tbaby_child_care\\n',\n",
       " '77\\tpaper_plastic\\n',\n",
       " '78\\tincontinence\\n',\n",
       " '79\\tshampoos\\n',\n",
       " '80\\tconditioners\\n',\n",
       " '81\\tmusic_players_karaoke\\n',\n",
       " '82\\tcough_cold\\n',\n",
       " '83\\tbath\\n',\n",
       " '84\\ttests\\n',\n",
       " '85\\tbuilding_toys\\n',\n",
       " '86\\tbuilding_sets\\n',\n",
       " '87\\tstress_reduction\\n',\n",
       " '88\\tfamily_planning_contraceptives\\n',\n",
       " '89\\tvitamins_supplements\\n',\n",
       " '90\\thair_color\\n',\n",
       " '91\\tpain_relievers\\n',\n",
       " '92\\tcotton_swabs\\n',\n",
       " '93\\tstyling_tools\\n',\n",
       " '94\\tfirst_aid\\n',\n",
       " '95\\tscrubs_body_treatments\\n',\n",
       " '96\\tcleaning_tools\\n',\n",
       " '97\\tpegged_puzzles\\n',\n",
       " '98\\tdiabetes\\n',\n",
       " '99\\tmagic_kits_accessories\\n',\n",
       " '100\\tgifts\\n',\n",
       " '101\\talbums\\n',\n",
       " '102\\tcrib_toys_attachments\\n',\n",
       " '103\\tdigestion_nausea\\n',\n",
       " '104\\telectronic_pets\\n',\n",
       " '105\\tsexual_wellness\\n',\n",
       " '106\\tsafer_sex\\n',\n",
       " '107\\tthermometers\\n',\n",
       " '108\\tstacking_nesting_toys\\n',\n",
       " '109\\tmakeup_remover\\n',\n",
       " '110\\ttemporary_tattoos\\n',\n",
       " '111\\tsports_outdoor_play\\n',\n",
       " '112\\tplay_tents_tunnels\\n',\n",
       " '113\\tscience\\n',\n",
       " '114\\tsports\\n',\n",
       " '115\\tbath_toys\\n',\n",
       " '116\\tpuppets\\n',\n",
       " '117\\tsystems_accessories\\n',\n",
       " '118\\thealth_monitors\\n',\n",
       " '119\\tinflatable_bouncers\\n',\n",
       " '120\\thobbies\\n',\n",
       " '121\\tmodel_building_kits_tools\\n',\n",
       " '122\\tblackboards_whiteboards\\n',\n",
       " '123\\tpools_water_fun\\n',\n",
       " '124\\trattles\\n',\n",
       " '125\\tsandboxes_accessories\\n',\n",
       " '126\\tactivity_play_centers\\n',\n",
       " '127\\tcar_seat_stroller_toys\\n',\n",
       " '128\\tfeeding\\n',\n",
       " '129\\tbottle_feeding\\n',\n",
       " '130\\tbreastfeeding\\n',\n",
       " '131\\tdiapering\\n',\n",
       " '132\\tdiaper_changing_kits\\n',\n",
       " '133\\tpuzzle_accessories\\n',\n",
       " '134\\tdiaper_pails_refills\\n',\n",
       " '135\\tsafety\\n',\n",
       " '136\\tbathroom_safety\\n',\n",
       " '137\\tmassage_relaxation\\n',\n",
       " '138\\tgates_doorways\\n',\n",
       " '139\\tnursery\\n',\n",
       " '140\\tfurniture\\n',\n",
       " '141\\tmonitors\\n',\n",
       " '142\\tcloth_diapers\\n',\n",
       " '143\\tplush_backpacks_purses\\n',\n",
       " '144\\tstatues\\n',\n",
       " '145\\tbathing_skin_care\\n',\n",
       " '146\\tbathing_tubs_seats\\n',\n",
       " '147\\tvehicles_remote_control\\n',\n",
       " '148\\tplay_vehicles\\n',\n",
       " '149\\tbackpacks_carriers\\n',\n",
       " '150\\tcraft_kits\\n',\n",
       " '151\\tcar_seats_accessories\\n',\n",
       " '152\\tcar_seats\\n',\n",
       " '153\\tnursery_d_cor\\n',\n",
       " '154\\thammering_pounding_toys\\n',\n",
       " '155\\tbedding\\n',\n",
       " '156\\tplay_trains_railway_sets\\n',\n",
       " '157\\trockets\\n',\n",
       " '158\\tstacking_blocks\\n',\n",
       " '159\\tdiaper_bags\\n',\n",
       " '160\\tstrollers\\n',\n",
       " '161\\tgym_sets_swings\\n',\n",
       " '162\\tpregnancy_maternity\\n',\n",
       " '163\\tmaternity_pillows\\n',\n",
       " '164\\trocking_spring_ride_ons\\n',\n",
       " '165\\tbraces\\n',\n",
       " '166\\taccessories\\n',\n",
       " '167\\tvehicle_playsets\\n',\n",
       " '168\\tdoll_accessories\\n',\n",
       " '169\\tpet_supplies\\n',\n",
       " '170\\tcats\\n',\n",
       " '171\\tlitter_housebreaking\\n',\n",
       " '172\\tspinning_tops\\n',\n",
       " '173\\tsets\\n',\n",
       " '174\\ttravel_games\\n',\n",
       " '175\\tpillows_stools\\n',\n",
       " '176\\tbattling_tops\\n',\n",
       " '177\\tcameras_camcorders\\n',\n",
       " '178\\tdance_mats\\n',\n",
       " '179\\tfood\\n',\n",
       " '180\\tradio_control\\n',\n",
       " '181\\tgrooming_healthcare_kits\\n',\n",
       " '182\\tballs\\n',\n",
       " '183\\ttile_games\\n',\n",
       " '184\\tpotty_training\\n',\n",
       " '185\\tpotties_seats\\n',\n",
       " '186\\thighchairs_booster_seats\\n',\n",
       " '187\\tstuffed_animals_toys\\n',\n",
       " '188\\tdvd_games\\n',\n",
       " '189\\tedge_corner_guards\\n',\n",
       " '190\\tbasic_life_skills_toys\\n',\n",
       " '191\\tactivity_centers_entertainers\\n',\n",
       " '192\\tthermometer_accessories\\n',\n",
       " '193\\twipes_holders\\n',\n",
       " '194\\tgift_sets\\n',\n",
       " '195\\tsolid_feeding\\n',\n",
       " '196\\tjoggers\\n',\n",
       " '197\\tfacial_steamers\\n',\n",
       " '198\\tkites_wind_spinners\\n',\n",
       " '199\\tdogs\\n',\n",
       " '200\\ttoys\\n',\n",
       " '201\\twalkers\\n',\n",
       " '202\\tslumber_bags\\n',\n",
       " '203\\tdie_cast_vehicles\\n',\n",
       " '204\\teasels\\n',\n",
       " '205\\tlips\\n',\n",
       " '206\\ttea\\n',\n",
       " '207\\treading_writing\\n',\n",
       " '208\\tgourmet_gifts\\n',\n",
       " '209\\tsnack_gifts\\n',\n",
       " '210\\tstacking_games\\n',\n",
       " '211\\tsauces_dips\\n',\n",
       " '212\\tsauces\\n',\n",
       " '213\\tbreakfast_foods\\n',\n",
       " '214\\tcereals\\n',\n",
       " '215\\tshopping_cart_covers\\n',\n",
       " '216\\tpantry_staples\\n',\n",
       " '217\\tscaled_model_vehicles\\n',\n",
       " '218\\tcooking_baking_supplies\\n',\n",
       " '219\\tpersonal_video_players_accessories\\n',\n",
       " '220\\tfragrance\\n',\n",
       " '221\\twomen_s\\n',\n",
       " '222\\tkeepsakes\\n',\n",
       " '223\\ttreats\\n',\n",
       " '224\\tswings\\n',\n",
       " '225\\ttrains_accessories\\n',\n",
       " '226\\tdisposable_diapers\\n',\n",
       " '227\\tplug_play_video_games\\n',\n",
       " '228\\tfloor_puzzles\\n',\n",
       " '229\\tfresh_flowers_live_indoor_plants\\n',\n",
       " '230\\tlive_indoor_plants\\n',\n",
       " '231\\tweight_loss_products\\n',\n",
       " '232\\tsmoking_cessation\\n',\n",
       " '233\\tbeauty_fashion\\n',\n",
       " '234\\tmirrors\\n',\n",
       " '235\\tcoffee\\n',\n",
       " '236\\tcabinet_locks_straps\\n',\n",
       " '237\\tplush_pillows\\n',\n",
       " '238\\tfloor_games\\n',\n",
       " '239\\tmakeup_brushes_tools\\n',\n",
       " '240\\talternative_medicine\\n',\n",
       " '241\\tchanging_table_pads_covers\\n',\n",
       " '242\\tmen_s\\n',\n",
       " '243\\tstep_stools\\n',\n",
       " '244\\trails_rail_guards\\n',\n",
       " '245\\tlaundry\\n',\n",
       " '246\\twomen_s_health\\n',\n",
       " '247\\tstandard\\n',\n",
       " '248\\tbeds_furniture\\n',\n",
       " '249\\therbs\\n',\n",
       " '250\\tsleep_positioners\\n',\n",
       " '251\\thealth_supplies\\n',\n",
       " '252\\tbreakfast_cereal_bars\\n',\n",
       " '253\\tbody_art\\n',\n",
       " '254\\tcondiments\\n',\n",
       " '255\\tbreads_bakery\\n',\n",
       " '256\\tcakes\\n',\n",
       " '257\\tdried_beans\\n',\n",
       " '258\\thousehold_cleaning\\n',\n",
       " '259\\tcollars\\n',\n",
       " '260\\teducational_repellents\\n',\n",
       " '261\\tdessert_gifts\\n',\n",
       " '262\\tadult_toys_games\\n',\n",
       " '263\\tteddy_bears\\n',\n",
       " '264\\ttherapeutic_skin_care\\n',\n",
       " '265\\tcandy_chocolate\\n',\n",
       " '266\\tchocolate_assortments\\n',\n",
       " '267\\tsand_water_tables\\n',\n",
       " '268\\tslot_cars\\n',\n",
       " '269\\tchocolate\\n',\n",
       " '270\\tsoft_drinks\\n',\n",
       " '271\\tsnack_food\\n',\n",
       " '272\\tchips_crisps\\n',\n",
       " '273\\tlicorice\\n',\n",
       " '274\\tfeeding_watering_supplies\\n',\n",
       " '275\\tblasters_foam_play\\n',\n",
       " '276\\tchocolate_bars\\n',\n",
       " '277\\tmeat_seafood\\n',\n",
       " '278\\twild_game_fowl\\n',\n",
       " '279\\tspices_seasonings\\n',\n",
       " '280\\tcookies\\n',\n",
       " '281\\ttraining_behavior_aids\\n',\n",
       " '282\\tgardening_tools\\n',\n",
       " '283\\tbathroom_aids_safety\\n',\n",
       " '284\\tpogo_sticks_hoppers\\n',\n",
       " '285\\tpowdered_drink_mixes\\n',\n",
       " '286\\tplayards\\n',\n",
       " '287\\tgag_toys_practical_jokes\\n',\n",
       " '288\\tbaby_food\\n',\n",
       " '289\\tbaby_formula\\n',\n",
       " '290\\tlighters\\n',\n",
       " '291\\tdried_fruit_raisins\\n',\n",
       " '292\\tmoney_banks\\n',\n",
       " '293\\tmarble_runs\\n',\n",
       " '294\\tgame_collections\\n',\n",
       " '295\\tkitchen_safety\\n',\n",
       " '296\\tfish_aquatic_pets\\n',\n",
       " '297\\tgum\\n',\n",
       " '298\\toutdoor_safety\\n',\n",
       " '299\\thair_nails\\n',\n",
       " '300\\taquarium_lights\\n',\n",
       " '301\\tblocks\\n',\n",
       " '302\\ttandem\\n',\n",
       " '303\\toccupational_physical_therapy_aids\\n',\n",
       " '304\\tpackaged_meals_side_dishes\\n',\n",
       " '305\\tindoor_climbers_play_structures\\n',\n",
       " '306\\tpumps_filters\\n',\n",
       " '307\\tbeds_accessories\\n',\n",
       " '308\\tenergy_drinks\\n',\n",
       " '309\\tsleep_snoring\\n',\n",
       " '310\\tgeography\\n',\n",
       " '311\\tsmall_animals\\n',\n",
       " '312\\thouses_habitats\\n',\n",
       " '313\\tdairy_eggs\\n',\n",
       " '314\\tcheese\\n',\n",
       " '315\\ttravel_systems\\n',\n",
       " '316\\twalkie_talkies\\n',\n",
       " '317\\tmobility_aids_equipment\\n',\n",
       " '318\\tsexual_enhancers\\n',\n",
       " '319\\tdips\\n',\n",
       " '320\\tdollhouse_accessories\\n',\n",
       " '321\\tparty_mix\\n',\n",
       " '322\\tbathing_accessories\\n',\n",
       " '323\\tgrooming\\n',\n",
       " '324\\tbaby_seats\\n',\n",
       " '325\\twind_up_toys\\n',\n",
       " '326\\tsensual_delights\\n',\n",
       " '327\\thot_cocoa\\n',\n",
       " '328\\tdishwashing\\n',\n",
       " '329\\tcarriers_strollers\\n',\n",
       " '330\\tflash_cards\\n',\n",
       " '331\\tbrain_teasers\\n',\n",
       " '332\\tnesting_dolls\\n',\n",
       " '333\\ttest_kits\\n',\n",
       " '334\\tlightweight\\n',\n",
       " '335\\thair_loss_products\\n',\n",
       " '336\\twater_treatments\\n',\n",
       " '337\\tbirds\\n',\n",
       " '338\\thair_scalp_treatments\\n',\n",
       " '339\\tcages_accessories\\n',\n",
       " '340\\tgummy_candies\\n',\n",
       " '341\\tgummy_candy\\n',\n",
       " '342\\tjerky_dried_meats\\n',\n",
       " '343\\thouses\\n',\n",
       " '344\\tchocolate_truffles\\n',\n",
       " '345\\tear_care\\n',\n",
       " '346\\tmilk\\n',\n",
       " '347\\tpizza_crusts\\n',\n",
       " '348\\tfresh_baked_cookies\\n',\n",
       " '349\\thard_candies\\n',\n",
       " '350\\tsports_supplements\\n',\n",
       " '351\\tbaking_mixes\\n',\n",
       " '352\\tcrackers\\n',\n",
       " '353\\tpork_rinds\\n',\n",
       " '354\\tpasta_noodles\\n',\n",
       " '355\\ttrail_mix\\n',\n",
       " '356\\tcarriers_travel_products\\n',\n",
       " '357\\tproduce\\n',\n",
       " '358\\tfresh_fruits\\n',\n",
       " '359\\ttoaster_pastries\\n',\n",
       " '360\\trice_cakes\\n',\n",
       " '361\\tchips\\n',\n",
       " '362\\tpuffed_snacks\\n',\n",
       " '363\\tpretzels\\n',\n",
       " '364\\tmathematics_counting\\n',\n",
       " '365\\tsuckers_lollipops\\n',\n",
       " '366\\tpopcorn\\n',\n",
       " '367\\ttoy_banks\\n',\n",
       " '368\\ttraining_pants\\n',\n",
       " '369\\ttea_gifts\\n',\n",
       " '370\\toils\\n',\n",
       " '371\\taquarium_hoods\\n',\n",
       " '372\\ttortillas\\n',\n",
       " '373\\tcheese_gifts\\n',\n",
       " '374\\tdoors\\n',\n",
       " '375\\tstandard_playing_card_decks\\n',\n",
       " '376\\tfudge\\n',\n",
       " '377\\tsyrups\\n',\n",
       " '378\\tprinting_stamping\\n',\n",
       " '379\\ttoy_gift_sets\\n',\n",
       " '380\\tcanned_jarred_food\\n',\n",
       " '381\\tfresh_vegetables\\n',\n",
       " '382\\tapparel_accessories\\n',\n",
       " '383\\tcandy_gifts\\n',\n",
       " '384\\tchewing_gum\\n',\n",
       " '385\\tpuzzle_play_mats\\n',\n",
       " '386\\telectrical_safety\\n',\n",
       " '387\\tsugars_sweeteners\\n',\n",
       " '388\\tmarble_games\\n',\n",
       " '389\\tminiatures\\n',\n",
       " '390\\tfinger_boards_finger_bikes\\n',\n",
       " '391\\tcoconut_water\\n',\n",
       " '392\\thandheld_games\\n',\n",
       " '393\\tslime_putty_toys\\n',\n",
       " '394\\tpastries\\n',\n",
       " '395\\thealth_baby_care\\n',\n",
       " '396\\tteethers\\n',\n",
       " '397\\tbutter\\n',\n",
       " '398\\tfruits\\n',\n",
       " '399\\tchocolate_pretzels\\n',\n",
       " '400\\tbreakfast_bakery\\n',\n",
       " '401\\tstickers\\n',\n",
       " '402\\tsoaps_cleansers\\n',\n",
       " '403\\tsauces_gifts\\n',\n",
       " '404\\tfitness_equipment\\n',\n",
       " '405\\twater\\n',\n",
       " '406\\tportable_changing_pads\\n',\n",
       " '407\\tdice_gaming_dice\\n',\n",
       " '408\\tpacifiers_accessories\\n',\n",
       " '409\\tbars\\n',\n",
       " '410\\tcocktail_mixers\\n',\n",
       " '411\\taquariums\\n',\n",
       " '412\\tball_pits_accessories\\n',\n",
       " '413\\tseafood\\n',\n",
       " '414\\tbags_cases\\n',\n",
       " '415\\tjelly_beans\\n',\n",
       " '416\\tnovelty_spinning_tops\\n',\n",
       " '417\\tautomatic_feeders\\n',\n",
       " '418\\tmints\\n',\n",
       " '419\\tmakeup_sets\\n',\n",
       " '420\\tcleaners\\n',\n",
       " '421\\tfresh_cut_flowers\\n',\n",
       " '422\\tjams\\n',\n",
       " '423\\tprams\\n',\n",
       " '424\\tnuts_seeds\\n',\n",
       " '425\\ttaffy\\n',\n",
       " '426\\tbunny_rabbit_central\\n',\n",
       " '427\\trabbit_hutches\\n',\n",
       " '428\\taquarium_d_cor\\n',\n",
       " '429\\tgranola_bars\\n',\n",
       " '430\\tviewfinders\\n',\n",
       " '431\\tharnesses_leashes\\n',\n",
       " '432\\tfoie_gras_p_t_s\\n',\n",
       " '433\\tgame_accessories\\n',\n",
       " '434\\tp_t_s\\n',\n",
       " '435\\tgame_room_games\\n',\n",
       " '436\\tcages\\n',\n",
       " '437\\tnon_slip_bath_mats\\n',\n",
       " '438\\thalva\\n',\n",
       " '439\\tseafood_gifts\\n',\n",
       " '440\\tstimulants\\n',\n",
       " '441\\tbeanbags_foot_bags\\n',\n",
       " '442\\tshampoo_conditioner_sets\\n',\n",
       " '443\\tnut_clusters\\n',\n",
       " '444\\traisins\\n',\n",
       " '445\\tbreadcrumbs\\n',\n",
       " '446\\textracts_flavoring\\n',\n",
       " '447\\tplush_puppets\\n',\n",
       " '448\\tshampoo_plus_conditioner\\n',\n",
       " '449\\tsalsas\\n',\n",
       " '450\\tmemorials\\n',\n",
       " '451\\tdie_cast_toy_vehicles\\n',\n",
       " '452\\taquarium_starter_kits\\n',\n",
       " '453\\tcoffee_gifts\\n',\n",
       " '454\\tair_fresheners\\n',\n",
       " '455\\tfruit_leather\\n',\n",
       " '456\\tgranola_trail_mix_bars\\n',\n",
       " '457\\tsugar_substitutes\\n',\n",
       " '458\\tbacon\\n',\n",
       " '459\\tcat_flaps\\n',\n",
       " '460\\taquarium_heaters\\n',\n",
       " '461\\thair_relaxers\\n',\n",
       " '462\\tbreads\\n',\n",
       " '463\\tpackaged_breads\\n',\n",
       " '464\\tsausages\\n',\n",
       " '465\\tdessert_toppings\\n',\n",
       " '466\\tdiaper_stackers_caddies\\n',\n",
       " '467\\tprisms_kaleidoscopes\\n',\n",
       " '468\\tmaternity\\n',\n",
       " '469\\tcrackers_biscuits\\n',\n",
       " '470\\tcoin_collecting\\n',\n",
       " '471\\tchocolate_gifts\\n',\n",
       " '472\\tkickball_playground_balls\\n',\n",
       " '473\\thair_perms_texturizers\\n',\n",
       " '474\\tyo_yos\\n',\n",
       " '475\\tflours_meals\\n',\n",
       " '476\\tbeef\\n',\n",
       " '477\\tmolding_sculpting_sticks\\n',\n",
       " '478\\twashcloths_towels\\n',\n",
       " '479\\tfruit_gifts\\n',\n",
       " '480\\tstuffing\\n',\n",
       " '481\\tbaking_powder\\n',\n",
       " '482\\tcereal\\n',\n",
       " '483\\texotic_meats\\n',\n",
       " '484\\tbreadsticks\\n',\n",
       " '485\\teggs\\n',\n",
       " '486\\tcloth_diaper_accessories\\n',\n",
       " '487\\tcarriers\\n',\n",
       " '488\\ttoffee\\n',\n",
       " '489\\thair_coloring_tools\\n',\n",
       " '490\\tchocolate_covered_fruit\\n',\n",
       " '491\\tcaramels\\n',\n",
       " '492\\tassortments\\n',\n",
       " '493\\taromatherapy\\n',\n",
       " '494\\tseat_covers\\n',\n",
       " '495\\tbondage_gear_accessories\\n',\n",
       " '496\\tsun_protection\\n',\n",
       " '497\\tdinners\\n',\n",
       " '498\\tfruit\\n',\n",
       " '499\\taquarium_stands\\n',\n",
       " '500\\tteaching_clocks\\n',\n",
       " '501\\tmilk_substitutes\\n',\n",
       " '502\\tbubble_bath\\n',\n",
       " '503\\tnovelties\\n',\n",
       " '504\\tjams_preserves_gifts\\n',\n",
       " '505\\tmeat_gifts\\n',\n",
       " '506\\tbeads\\n',\n",
       " '507\\tfish_bowls\\n',\n",
       " '508\\todor_stain_removers\\n',\n",
       " '509\\tfood_coloring\\n',\n",
       " '510\\tchildren_s\\n',\n",
       " '511\\tice_cream_frozen_desserts\\n',\n",
       " '512\\tpastry_decorations\\n',\n",
       " '513\\tchicken\\n',\n",
       " '514\\tsports_drinks\\n',\n",
       " '515\\tmarshmallows\\n',\n",
       " '516\\tpudding\\n',\n",
       " '517\\taprons_smocks\\n',\n",
       " '518\\telectronics\\n',\n",
       " '519\\tsex_furniture\\n',\n",
       " '520\\tpork\\n',\n",
       " '521\\tspices_gifts\\n',\n",
       " '522\\tdried_fruit\\n',\n",
       " '523\\tflying_toys\\n',\n",
       " '524\\tshampoo\\n',\n",
       " '525\\tcoatings_batters\\n',\n",
       " '526\\thydrometers\\n',\n",
       " '527\\tlamb\\n',\n",
       " '528\\texercise_wheels\\n',\n",
       " '529\\tchocolate_covered_nuts\\n',\n",
       " '530\\tbreeding_tanks\\n']"
      ]
     },
     "execution_count": 140,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "file2 = open('../../datasets/Amazon-531/test/labels.txt', 'r')\n",
    "test_label_set = file2.readlines()\n",
    "test_label_set"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 141,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['grocery_gourmet_food',\n",
       " 'meat_poultry',\n",
       " 'jerky',\n",
       " 'toys_games',\n",
       " 'games',\n",
       " 'puzzles',\n",
       " 'jigsaw_puzzles',\n",
       " 'board_games',\n",
       " 'beverages',\n",
       " 'juices',\n",
       " 'beauty',\n",
       " 'makeup',\n",
       " 'nails',\n",
       " 'arts_crafts',\n",
       " 'drawing_painting_supplies',\n",
       " 'action_toy_figures',\n",
       " 'figures',\n",
       " 'dolls_accessories',\n",
       " 'dolls',\n",
       " 'card_games',\n",
       " 'drawing_sketching_tablets',\n",
       " 'baby_toddler_toys',\n",
       " 'shape_sorters',\n",
       " 'health_personal_care',\n",
       " 'personal_care',\n",
       " 'deodorants_antiperspirants',\n",
       " 'nutrition_wellness',\n",
       " 'nutrition_bars_drinks',\n",
       " 'learning_education',\n",
       " 'habitats',\n",
       " 'electronics_for_kids',\n",
       " 'household_supplies',\n",
       " 'household_batteries',\n",
       " 'push_pull_toys',\n",
       " 'stuffed_animals_plush',\n",
       " 'tricycles',\n",
       " 'scooters_wagons',\n",
       " 'clay_dough',\n",
       " 'health_care',\n",
       " 'allergy',\n",
       " 'baby_products',\n",
       " 'gear',\n",
       " 'baby_gyms_playmats',\n",
       " 'shaving_hair_removal',\n",
       " 'skin_care',\n",
       " 'face',\n",
       " 'animals_figures',\n",
       " 'feminine_care',\n",
       " 'music_sound',\n",
       " 'oral_hygiene',\n",
       " 'grown_up_toys',\n",
       " 'dress_up_pretend_play',\n",
       " 'pretend_play',\n",
       " 'novelty_gag_toys',\n",
       " 'bath_body',\n",
       " 'cleansers',\n",
       " 'playsets',\n",
       " 'd_puzzles',\n",
       " 'dollhouses',\n",
       " 'lip_care_products',\n",
       " 'tools_accessories',\n",
       " 'nail_tools',\n",
       " 'eye_care',\n",
       " 'pill_cases_splitters',\n",
       " 'hair_care',\n",
       " 'styling_products',\n",
       " 'electronic_toys',\n",
       " 'body',\n",
       " 'toy_balls',\n",
       " 'eyes',\n",
       " 'trading_card_games',\n",
       " 'foot_care',\n",
       " 'hands_nails',\n",
       " 'sun',\n",
       " 'medical_supplies_equipment',\n",
       " 'daily_living_aids',\n",
       " 'baby_child_care',\n",
       " 'paper_plastic',\n",
       " 'incontinence',\n",
       " 'shampoos',\n",
       " 'conditioners',\n",
       " 'music_players_karaoke',\n",
       " 'cough_cold',\n",
       " 'bath',\n",
       " 'tests',\n",
       " 'building_toys',\n",
       " 'building_sets',\n",
       " 'stress_reduction',\n",
       " 'family_planning_contraceptives',\n",
       " 'vitamins_supplements',\n",
       " 'hair_color',\n",
       " 'pain_relievers',\n",
       " 'cotton_swabs',\n",
       " 'styling_tools',\n",
       " 'first_aid',\n",
       " 'scrubs_body_treatments',\n",
       " 'cleaning_tools',\n",
       " 'pegged_puzzles',\n",
       " 'diabetes',\n",
       " 'magic_kits_accessories',\n",
       " 'gifts',\n",
       " 'albums',\n",
       " 'crib_toys_attachments',\n",
       " 'digestion_nausea',\n",
       " 'electronic_pets',\n",
       " 'sexual_wellness',\n",
       " 'safer_sex',\n",
       " 'thermometers',\n",
       " 'stacking_nesting_toys',\n",
       " 'makeup_remover',\n",
       " 'temporary_tattoos',\n",
       " 'sports_outdoor_play',\n",
       " 'play_tents_tunnels',\n",
       " 'science',\n",
       " 'sports',\n",
       " 'bath_toys',\n",
       " 'puppets',\n",
       " 'systems_accessories',\n",
       " 'health_monitors',\n",
       " 'inflatable_bouncers',\n",
       " 'hobbies',\n",
       " 'model_building_kits_tools',\n",
       " 'blackboards_whiteboards',\n",
       " 'pools_water_fun',\n",
       " 'rattles',\n",
       " 'sandboxes_accessories',\n",
       " 'activity_play_centers',\n",
       " 'car_seat_stroller_toys',\n",
       " 'feeding',\n",
       " 'bottle_feeding',\n",
       " 'breastfeeding',\n",
       " 'diapering',\n",
       " 'diaper_changing_kits',\n",
       " 'puzzle_accessories',\n",
       " 'diaper_pails_refills',\n",
       " 'safety',\n",
       " 'bathroom_safety',\n",
       " 'massage_relaxation',\n",
       " 'gates_doorways',\n",
       " 'nursery',\n",
       " 'furniture',\n",
       " 'monitors',\n",
       " 'cloth_diapers',\n",
       " 'plush_backpacks_purses',\n",
       " 'statues',\n",
       " 'bathing_skin_care',\n",
       " 'bathing_tubs_seats',\n",
       " 'vehicles_remote_control',\n",
       " 'play_vehicles',\n",
       " 'backpacks_carriers',\n",
       " 'craft_kits',\n",
       " 'car_seats_accessories',\n",
       " 'car_seats',\n",
       " 'nursery_d_cor',\n",
       " 'hammering_pounding_toys',\n",
       " 'bedding',\n",
       " 'play_trains_railway_sets',\n",
       " 'rockets',\n",
       " 'stacking_blocks',\n",
       " 'diaper_bags',\n",
       " 'strollers',\n",
       " 'gym_sets_swings',\n",
       " 'pregnancy_maternity',\n",
       " 'maternity_pillows',\n",
       " 'rocking_spring_ride_ons',\n",
       " 'braces',\n",
       " 'accessories',\n",
       " 'vehicle_playsets',\n",
       " 'doll_accessories',\n",
       " 'pet_supplies',\n",
       " 'cats',\n",
       " 'litter_housebreaking',\n",
       " 'spinning_tops',\n",
       " 'sets',\n",
       " 'travel_games',\n",
       " 'pillows_stools',\n",
       " 'battling_tops',\n",
       " 'cameras_camcorders',\n",
       " 'dance_mats',\n",
       " 'food',\n",
       " 'radio_control',\n",
       " 'grooming_healthcare_kits',\n",
       " 'balls',\n",
       " 'tile_games',\n",
       " 'potty_training',\n",
       " 'potties_seats',\n",
       " 'highchairs_booster_seats',\n",
       " 'stuffed_animals_toys',\n",
       " 'dvd_games',\n",
       " 'edge_corner_guards',\n",
       " 'basic_life_skills_toys',\n",
       " 'activity_centers_entertainers',\n",
       " 'thermometer_accessories',\n",
       " 'wipes_holders',\n",
       " 'gift_sets',\n",
       " 'solid_feeding',\n",
       " 'joggers',\n",
       " 'facial_steamers',\n",
       " 'kites_wind_spinners',\n",
       " 'dogs',\n",
       " 'toys',\n",
       " 'walkers',\n",
       " 'slumber_bags',\n",
       " 'die_cast_vehicles',\n",
       " 'easels',\n",
       " 'lips',\n",
       " 'tea',\n",
       " 'reading_writing',\n",
       " 'gourmet_gifts',\n",
       " 'snack_gifts',\n",
       " 'stacking_games',\n",
       " 'sauces_dips',\n",
       " 'sauces',\n",
       " 'breakfast_foods',\n",
       " 'cereals',\n",
       " 'shopping_cart_covers',\n",
       " 'pantry_staples',\n",
       " 'scaled_model_vehicles',\n",
       " 'cooking_baking_supplies',\n",
       " 'personal_video_players_accessories',\n",
       " 'fragrance',\n",
       " 'women_s',\n",
       " 'keepsakes',\n",
       " 'treats',\n",
       " 'swings',\n",
       " 'trains_accessories',\n",
       " 'disposable_diapers',\n",
       " 'plug_play_video_games',\n",
       " 'floor_puzzles',\n",
       " 'fresh_flowers_live_indoor_plants',\n",
       " 'live_indoor_plants',\n",
       " 'weight_loss_products',\n",
       " 'smoking_cessation',\n",
       " 'beauty_fashion',\n",
       " 'mirrors',\n",
       " 'coffee',\n",
       " 'cabinet_locks_straps',\n",
       " 'plush_pillows',\n",
       " 'floor_games',\n",
       " 'makeup_brushes_tools',\n",
       " 'alternative_medicine',\n",
       " 'changing_table_pads_covers',\n",
       " 'men_s',\n",
       " 'step_stools',\n",
       " 'rails_rail_guards',\n",
       " 'laundry',\n",
       " 'women_s_health',\n",
       " 'standard',\n",
       " 'beds_furniture',\n",
       " 'herbs',\n",
       " 'sleep_positioners',\n",
       " 'health_supplies',\n",
       " 'breakfast_cereal_bars',\n",
       " 'body_art',\n",
       " 'condiments',\n",
       " 'breads_bakery',\n",
       " 'cakes',\n",
       " 'dried_beans',\n",
       " 'household_cleaning',\n",
       " 'collars',\n",
       " 'educational_repellents',\n",
       " 'dessert_gifts',\n",
       " 'adult_toys_games',\n",
       " 'teddy_bears',\n",
       " 'therapeutic_skin_care',\n",
       " 'candy_chocolate',\n",
       " 'chocolate_assortments',\n",
       " 'sand_water_tables',\n",
       " 'slot_cars',\n",
       " 'chocolate',\n",
       " 'soft_drinks',\n",
       " 'snack_food',\n",
       " 'chips_crisps',\n",
       " 'licorice',\n",
       " 'feeding_watering_supplies',\n",
       " 'blasters_foam_play',\n",
       " 'chocolate_bars',\n",
       " 'meat_seafood',\n",
       " 'wild_game_fowl',\n",
       " 'spices_seasonings',\n",
       " 'cookies',\n",
       " 'training_behavior_aids',\n",
       " 'gardening_tools',\n",
       " 'bathroom_aids_safety',\n",
       " 'pogo_sticks_hoppers',\n",
       " 'powdered_drink_mixes',\n",
       " 'playards',\n",
       " 'gag_toys_practical_jokes',\n",
       " 'baby_food',\n",
       " 'baby_formula',\n",
       " 'lighters',\n",
       " 'dried_fruit_raisins',\n",
       " 'money_banks',\n",
       " 'marble_runs',\n",
       " 'game_collections',\n",
       " 'kitchen_safety',\n",
       " 'fish_aquatic_pets',\n",
       " 'gum',\n",
       " 'outdoor_safety',\n",
       " 'hair_nails',\n",
       " 'aquarium_lights',\n",
       " 'blocks',\n",
       " 'tandem',\n",
       " 'occupational_physical_therapy_aids',\n",
       " 'packaged_meals_side_dishes',\n",
       " 'indoor_climbers_play_structures',\n",
       " 'pumps_filters',\n",
       " 'beds_accessories',\n",
       " 'energy_drinks',\n",
       " 'sleep_snoring',\n",
       " 'geography',\n",
       " 'small_animals',\n",
       " 'houses_habitats',\n",
       " 'dairy_eggs',\n",
       " 'cheese',\n",
       " 'travel_systems',\n",
       " 'walkie_talkies',\n",
       " 'mobility_aids_equipment',\n",
       " 'sexual_enhancers',\n",
       " 'dips',\n",
       " 'dollhouse_accessories',\n",
       " 'party_mix',\n",
       " 'bathing_accessories',\n",
       " 'grooming',\n",
       " 'baby_seats',\n",
       " 'wind_up_toys',\n",
       " 'sensual_delights',\n",
       " 'hot_cocoa',\n",
       " 'dishwashing',\n",
       " 'carriers_strollers',\n",
       " 'flash_cards',\n",
       " 'brain_teasers',\n",
       " 'nesting_dolls',\n",
       " 'test_kits',\n",
       " 'lightweight',\n",
       " 'hair_loss_products',\n",
       " 'water_treatments',\n",
       " 'birds',\n",
       " 'hair_scalp_treatments',\n",
       " 'cages_accessories',\n",
       " 'gummy_candies',\n",
       " 'gummy_candy',\n",
       " 'jerky_dried_meats',\n",
       " 'houses',\n",
       " 'chocolate_truffles',\n",
       " 'ear_care',\n",
       " 'milk',\n",
       " 'pizza_crusts',\n",
       " 'fresh_baked_cookies',\n",
       " 'hard_candies',\n",
       " 'sports_supplements',\n",
       " 'baking_mixes',\n",
       " 'crackers',\n",
       " 'pork_rinds',\n",
       " 'pasta_noodles',\n",
       " 'trail_mix',\n",
       " 'carriers_travel_products',\n",
       " 'produce',\n",
       " 'fresh_fruits',\n",
       " 'toaster_pastries',\n",
       " 'rice_cakes',\n",
       " 'chips',\n",
       " 'puffed_snacks',\n",
       " 'pretzels',\n",
       " 'mathematics_counting',\n",
       " 'suckers_lollipops',\n",
       " 'popcorn',\n",
       " 'toy_banks',\n",
       " 'training_pants',\n",
       " 'tea_gifts',\n",
       " 'oils',\n",
       " 'aquarium_hoods',\n",
       " 'tortillas',\n",
       " 'cheese_gifts',\n",
       " 'doors',\n",
       " 'standard_playing_card_decks',\n",
       " 'fudge',\n",
       " 'syrups',\n",
       " 'printing_stamping',\n",
       " 'toy_gift_sets',\n",
       " 'canned_jarred_food',\n",
       " 'fresh_vegetables',\n",
       " 'apparel_accessories',\n",
       " 'candy_gifts',\n",
       " 'chewing_gum',\n",
       " 'puzzle_play_mats',\n",
       " 'electrical_safety',\n",
       " 'sugars_sweeteners',\n",
       " 'marble_games',\n",
       " 'miniatures',\n",
       " 'finger_boards_finger_bikes',\n",
       " 'coconut_water',\n",
       " 'handheld_games',\n",
       " 'slime_putty_toys',\n",
       " 'pastries',\n",
       " 'health_baby_care',\n",
       " 'teethers',\n",
       " 'butter',\n",
       " 'fruits',\n",
       " 'chocolate_pretzels',\n",
       " 'breakfast_bakery',\n",
       " 'stickers',\n",
       " 'soaps_cleansers',\n",
       " 'sauces_gifts',\n",
       " 'fitness_equipment',\n",
       " 'water',\n",
       " 'portable_changing_pads',\n",
       " 'dice_gaming_dice',\n",
       " 'pacifiers_accessories',\n",
       " 'bars',\n",
       " 'cocktail_mixers',\n",
       " 'aquariums',\n",
       " 'ball_pits_accessories',\n",
       " 'seafood',\n",
       " 'bags_cases',\n",
       " 'jelly_beans',\n",
       " 'novelty_spinning_tops',\n",
       " 'automatic_feeders',\n",
       " 'mints',\n",
       " 'makeup_sets',\n",
       " 'cleaners',\n",
       " 'fresh_cut_flowers',\n",
       " 'jams',\n",
       " 'prams',\n",
       " 'nuts_seeds',\n",
       " 'taffy',\n",
       " 'bunny_rabbit_central',\n",
       " 'rabbit_hutches',\n",
       " 'aquarium_d_cor',\n",
       " 'granola_bars',\n",
       " 'viewfinders',\n",
       " 'harnesses_leashes',\n",
       " 'foie_gras_p_t_s',\n",
       " 'game_accessories',\n",
       " 'p_t_s',\n",
       " 'game_room_games',\n",
       " 'cages',\n",
       " 'non_slip_bath_mats',\n",
       " 'halva',\n",
       " 'seafood_gifts',\n",
       " 'stimulants',\n",
       " 'beanbags_foot_bags',\n",
       " 'shampoo_conditioner_sets',\n",
       " 'nut_clusters',\n",
       " 'raisins',\n",
       " 'breadcrumbs',\n",
       " 'extracts_flavoring',\n",
       " 'plush_puppets',\n",
       " 'shampoo_plus_conditioner',\n",
       " 'salsas',\n",
       " 'memorials',\n",
       " 'die_cast_toy_vehicles',\n",
       " 'aquarium_starter_kits',\n",
       " 'coffee_gifts',\n",
       " 'air_fresheners',\n",
       " 'fruit_leather',\n",
       " 'granola_trail_mix_bars',\n",
       " 'sugar_substitutes',\n",
       " 'bacon',\n",
       " 'cat_flaps',\n",
       " 'aquarium_heaters',\n",
       " 'hair_relaxers',\n",
       " 'breads',\n",
       " 'packaged_breads',\n",
       " 'sausages',\n",
       " 'dessert_toppings',\n",
       " 'diaper_stackers_caddies',\n",
       " 'prisms_kaleidoscopes',\n",
       " 'maternity',\n",
       " 'crackers_biscuits',\n",
       " 'coin_collecting',\n",
       " 'chocolate_gifts',\n",
       " 'kickball_playground_balls',\n",
       " 'hair_perms_texturizers',\n",
       " 'yo_yos',\n",
       " 'flours_meals',\n",
       " 'beef',\n",
       " 'molding_sculpting_sticks',\n",
       " 'washcloths_towels',\n",
       " 'fruit_gifts',\n",
       " 'stuffing',\n",
       " 'baking_powder',\n",
       " 'cereal',\n",
       " 'exotic_meats',\n",
       " 'breadsticks',\n",
       " 'eggs',\n",
       " 'cloth_diaper_accessories',\n",
       " 'carriers',\n",
       " 'toffee',\n",
       " 'hair_coloring_tools',\n",
       " 'chocolate_covered_fruit',\n",
       " 'caramels',\n",
       " 'assortments',\n",
       " 'aromatherapy',\n",
       " 'seat_covers',\n",
       " 'bondage_gear_accessories',\n",
       " 'sun_protection',\n",
       " 'dinners',\n",
       " 'fruit',\n",
       " 'aquarium_stands',\n",
       " 'teaching_clocks',\n",
       " 'milk_substitutes',\n",
       " 'bubble_bath',\n",
       " 'novelties',\n",
       " 'jams_preserves_gifts',\n",
       " 'meat_gifts',\n",
       " 'beads',\n",
       " 'fish_bowls',\n",
       " 'odor_stain_removers',\n",
       " 'food_coloring',\n",
       " 'children_s',\n",
       " 'ice_cream_frozen_desserts',\n",
       " 'pastry_decorations',\n",
       " 'chicken',\n",
       " 'sports_drinks',\n",
       " 'marshmallows',\n",
       " 'pudding',\n",
       " 'aprons_smocks',\n",
       " 'electronics',\n",
       " 'sex_furniture',\n",
       " 'pork',\n",
       " 'spices_gifts',\n",
       " 'dried_fruit',\n",
       " 'flying_toys',\n",
       " 'shampoo',\n",
       " 'coatings_batters',\n",
       " 'hydrometers',\n",
       " 'lamb',\n",
       " 'exercise_wheels',\n",
       " 'chocolate_covered_nuts',\n",
       " 'breeding_tanks']"
      ]
     },
     "execution_count": 141,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "true_labels = []\n",
    "for label in test_label_set:\n",
    "    true_labels.append(label.strip().split('\\t')[1])\n",
    "true_labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "file3 = open('../../datasets/Amazon-531/test_label.txt', 'r')\n",
    "test_label_set = file2.readlines()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sentence_transformers import SentenceTransformer, util\n",
    "\n",
    "model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2', device = 0)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Document</th>\n",
       "      <th>Topic</th>\n",
       "      <th>Name</th>\n",
       "      <th>Representation</th>\n",
       "      <th>Representative_Docs</th>\n",
       "      <th>Top_n_words</th>\n",
       "      <th>Representative_document</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>ASIAN EXPORTERS FEAR DAMAGE FROM U.S.-JAPAN RI...</td>\n",
       "      <td>21</td>\n",
       "      <td>exports trade</td>\n",
       "      <td>[ec, trade, steel, japan, coffee, exports, jap...</td>\n",
       "      <td>[ASIAN EXPORTERS FEAR DAMAGE FROM U.S.-JAPAN R...</td>\n",
       "      <td>ec - trade - steel - japan - coffee - exports ...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>CHINA DAILY SAYS VERMIN EAT 7-12 PCT GRAIN STO...</td>\n",
       "      <td>18</td>\n",
       "      <td>commodities agriculture</td>\n",
       "      <td>[tonnes, grain, wheat, storage, crop, 000 tonn...</td>\n",
       "      <td>[BRAZIL GRAIN HARVEST FACES STORAGE PROBLEMS  ...</td>\n",
       "      <td>tonnes - grain - wheat - storage - crop - 000 ...</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>JAPAN TO REVISE LONG-TERM ENERGY DEMAND DOWNWA...</td>\n",
       "      <td>97</td>\n",
       "      <td>energy</td>\n",
       "      <td>[energy, miti, demand, revise, natural, power,...</td>\n",
       "      <td>[JAPAN TO REVISE LONG-TERM ENERGY DEMAND DOWNW...</td>\n",
       "      <td>energy - miti - demand - revise - natural - po...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>THAI TRADE DEFICIT WIDENS IN FIRST QUARTER  Th...</td>\n",
       "      <td>3</td>\n",
       "      <td>agriculture exports</td>\n",
       "      <td>[nil, nil nil, 87, tonnes, 1986 87, 1987 88, s...</td>\n",
       "      <td>[SOYBEAN SUPPLY/DEMAND BY COUNTRY -- USDA  The...</td>\n",
       "      <td>nil - nil nil - 87 - tonnes - 1986 87 - 1987 8...</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>INDONESIA SEES CPO PRICE RISING SHARPLY  Indon...</td>\n",
       "      <td>4</td>\n",
       "      <td>commodity prices</td>\n",
       "      <td>[prices, futures, price, traders, palm, rise, ...</td>\n",
       "      <td>[U.K. PRODUCER PRICES SEEN MOVED BY TECHNICALI...</td>\n",
       "      <td>prices - futures - price - traders - palm - ri...</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>995</th>\n",
       "      <td>HONEYWELL INC &amp;lt;HON&gt; 1ST QTR OPER NET  Oper ...</td>\n",
       "      <td>20</td>\n",
       "      <td>earnings mergers</td>\n",
       "      <td>[vs, net, shr, qtr, 000, dlrs vs, cts, 05 dlrs...</td>\n",
       "      <td>[WESTINGHOUSE ELECTRIC CORP &amp;lt;WX&gt; 1ST QTR NE...</td>\n",
       "      <td>vs - net - shr - qtr - 000 - dlrs vs - cts - 0...</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>996</th>\n",
       "      <td>WALL STREET STOCKS/BROWNING FERRIS &amp;lt;BFI&gt;  T...</td>\n",
       "      <td>63</td>\n",
       "      <td>finance stocks</td>\n",
       "      <td>[steel, fleet, sees, suit, recommend, stock, s...</td>\n",
       "      <td>[WALL STREET STOCKS/BROWNING FERRIS &amp;lt;BFI&gt;  ...</td>\n",
       "      <td>steel - fleet - sees - suit - recommend - stoc...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>997</th>\n",
       "      <td>HOMESTAKE &amp;lt;HM&gt; MULLS BUYING ORE RESERVES  H...</td>\n",
       "      <td>32</td>\n",
       "      <td>mining gold</td>\n",
       "      <td>[gold, mining, tons, 000 tons, ounces, explora...</td>\n",
       "      <td>[GORDEX MINERALS LOCATES CANADA GOLD DEPOSITS ...</td>\n",
       "      <td>gold - mining - tons - 000 tons - ounces - exp...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>998</th>\n",
       "      <td>CHRONAR CORP &amp;lt;CRNR.O&gt; YEAR LOSS  Shr loss 9...</td>\n",
       "      <td>15</td>\n",
       "      <td>loss revenue</td>\n",
       "      <td>[loss, 000 dlrs, 000, dlrs, vs profit, quarter...</td>\n",
       "      <td>[GENERAL INSTRUMENT CORP &amp;lt;GRL&gt; 4TH QTR LOSS...</td>\n",
       "      <td>loss - 000 dlrs - 000 - dlrs - vs profit - qua...</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>999</th>\n",
       "      <td>EMPIRE &amp;lt;EMPA.TO&gt; TO BUY SOBEYS &amp;lt;SYSA.TO&gt;...</td>\n",
       "      <td>1</td>\n",
       "      <td>acquisitions ownership</td>\n",
       "      <td>[stock, acquired, common, purolator, shares, a...</td>\n",
       "      <td>[E.F. HUTTON &amp;lt;EFH&gt; UNIT'S PUROLATOR OFFER E...</td>\n",
       "      <td>stock - acquired - common - purolator - shares...</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1000 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                              Document  Topic  \\\n",
       "0    ASIAN EXPORTERS FEAR DAMAGE FROM U.S.-JAPAN RI...     21   \n",
       "1    CHINA DAILY SAYS VERMIN EAT 7-12 PCT GRAIN STO...     18   \n",
       "2    JAPAN TO REVISE LONG-TERM ENERGY DEMAND DOWNWA...     97   \n",
       "3    THAI TRADE DEFICIT WIDENS IN FIRST QUARTER  Th...      3   \n",
       "4    INDONESIA SEES CPO PRICE RISING SHARPLY  Indon...      4   \n",
       "..                                                 ...    ...   \n",
       "995  HONEYWELL INC &lt;HON> 1ST QTR OPER NET  Oper ...     20   \n",
       "996  WALL STREET STOCKS/BROWNING FERRIS &lt;BFI>  T...     63   \n",
       "997  HOMESTAKE &lt;HM> MULLS BUYING ORE RESERVES  H...     32   \n",
       "998  CHRONAR CORP &lt;CRNR.O> YEAR LOSS  Shr loss 9...     15   \n",
       "999  EMPIRE &lt;EMPA.TO> TO BUY SOBEYS &lt;SYSA.TO>...      1   \n",
       "\n",
       "                        Name  \\\n",
       "0              exports trade   \n",
       "1    commodities agriculture   \n",
       "2                     energy   \n",
       "3        agriculture exports   \n",
       "4           commodity prices   \n",
       "..                       ...   \n",
       "995         earnings mergers   \n",
       "996           finance stocks   \n",
       "997              mining gold   \n",
       "998             loss revenue   \n",
       "999   acquisitions ownership   \n",
       "\n",
       "                                        Representation  \\\n",
       "0    [ec, trade, steel, japan, coffee, exports, jap...   \n",
       "1    [tonnes, grain, wheat, storage, crop, 000 tonn...   \n",
       "2    [energy, miti, demand, revise, natural, power,...   \n",
       "3    [nil, nil nil, 87, tonnes, 1986 87, 1987 88, s...   \n",
       "4    [prices, futures, price, traders, palm, rise, ...   \n",
       "..                                                 ...   \n",
       "995  [vs, net, shr, qtr, 000, dlrs vs, cts, 05 dlrs...   \n",
       "996  [steel, fleet, sees, suit, recommend, stock, s...   \n",
       "997  [gold, mining, tons, 000 tons, ounces, explora...   \n",
       "998  [loss, 000 dlrs, 000, dlrs, vs profit, quarter...   \n",
       "999  [stock, acquired, common, purolator, shares, a...   \n",
       "\n",
       "                                   Representative_Docs  \\\n",
       "0    [ASIAN EXPORTERS FEAR DAMAGE FROM U.S.-JAPAN R...   \n",
       "1    [BRAZIL GRAIN HARVEST FACES STORAGE PROBLEMS  ...   \n",
       "2    [JAPAN TO REVISE LONG-TERM ENERGY DEMAND DOWNW...   \n",
       "3    [SOYBEAN SUPPLY/DEMAND BY COUNTRY -- USDA  The...   \n",
       "4    [U.K. PRODUCER PRICES SEEN MOVED BY TECHNICALI...   \n",
       "..                                                 ...   \n",
       "995  [WESTINGHOUSE ELECTRIC CORP &lt;WX> 1ST QTR NE...   \n",
       "996  [WALL STREET STOCKS/BROWNING FERRIS &lt;BFI>  ...   \n",
       "997  [GORDEX MINERALS LOCATES CANADA GOLD DEPOSITS ...   \n",
       "998  [GENERAL INSTRUMENT CORP &lt;GRL> 4TH QTR LOSS...   \n",
       "999  [E.F. HUTTON &lt;EFH> UNIT'S PUROLATOR OFFER E...   \n",
       "\n",
       "                                           Top_n_words  \\\n",
       "0    ec - trade - steel - japan - coffee - exports ...   \n",
       "1    tonnes - grain - wheat - storage - crop - 000 ...   \n",
       "2    energy - miti - demand - revise - natural - po...   \n",
       "3    nil - nil nil - 87 - tonnes - 1986 87 - 1987 8...   \n",
       "4    prices - futures - price - traders - palm - ri...   \n",
       "..                                                 ...   \n",
       "995  vs - net - shr - qtr - 000 - dlrs vs - cts - 0...   \n",
       "996  steel - fleet - sees - suit - recommend - stoc...   \n",
       "997  gold - mining - tons - 000 tons - ounces - exp...   \n",
       "998  loss - 000 dlrs - 000 - dlrs - vs profit - qua...   \n",
       "999  stock - acquired - common - purolator - shares...   \n",
       "\n",
       "     Representative_document  \n",
       "0                       True  \n",
       "1                      False  \n",
       "2                       True  \n",
       "3                      False  \n",
       "4                      False  \n",
       "..                       ...  \n",
       "995                    False  \n",
       "996                     True  \n",
       "997                     True  \n",
       "998                    False  \n",
       "999                    False  \n",
       "\n",
       "[1000 rows x 7 columns]"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "International Trade and Exports\n",
      "trade\n",
      "Agricultural Production and Disease\n",
      "grain\n",
      "Energy market and prices\n",
      "nat-gas\n",
      "Agricultural Exports\n",
      "corn\n",
      "OPEC oil production and prices\n",
      "veg-oil\n",
      "Maritime Accidents and Safety\n",
      "ship\n",
      "Coffee trade regulations\n",
      "coffee\n",
      "International trade and agricultural policies\n",
      "wheat\n",
      "Gold Mining Investment and Production\n",
      "gold\n",
      "Corporate Finance: Mergers & Acquisitions\n",
      "acq\n",
      "Trade Agreements and Negotiations\n",
      "tin\n",
      "Financial instruments and interest rates\n",
      "interest\n",
      "Mining and Gold Reserves\n",
      "copper\n",
      "Economic growth and industrial production\n",
      "ipi\n",
      "Global Agricultural Markets\n",
      "livestock\n",
      "Dividend Finance\n",
      "earn\n",
      "Corporate earnings and revenue\n",
      "earn\n",
      "Central banking and monetary policy\n",
      "money-fx\n",
      "Trade Surplus and Economic Balance\n",
      "trade\n",
      "Trading commodities: future\n",
      "lead\n",
      "Gold Mining Investment and Production\n",
      "acq\n",
      "Economic growth and industrial production\n",
      "jobs\n",
      "Gold Mining Investment and Production\n",
      "earn\n",
      "Financial Regulation\n",
      "earn\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Dividend Finance\n",
      "earn\n",
      "Trade Agreements and Negotiations\n",
      "tin\n",
      "Trade Policy and Protectionism\n",
      "trade\n",
      "Agricultural Exports\n",
      "zinc\n",
      "Sugar Trade and Exports\n",
      "sugar\n",
      "Sugar Trade and Exports\n",
      "sugar\n",
      "Food and Beverage Mergers and Acquisitions\n",
      "acq\n",
      "Financial aspects of currency and economy\n",
      "money-fx\n",
      "Oil prices and their impact on the energy industry\n",
      "cpi\n",
      "OPEC oil production\n",
      "oilseed\n",
      "Corporate actions: stock splits and earnings\n",
      "earn\n",
      "Corporate ownership and mergers\n",
      "acq\n",
      "Dividend Finance\n",
      "earn\n",
      "Trade Policy and Legislation\n",
      "trade\n",
      "Corporate takeovers and share ownership\n",
      "acq\n",
      "Japanese Shipbuilding Industry\n",
      "acq\n",
      "Corporate actions: stock splits and dividends\n",
      "earn\n",
      "Trade Policy and Protectionism\n",
      "trade\n",
      "Financial aspects of currency and economy\n",
      "money-fx\n",
      "Economic Inflation\n",
      "cpi\n",
      "Central banking and monetary policy\n",
      "money-fx\n",
      "Corporate Legal Disputes\n",
      "acq\n",
      "Sugar production and profitability in agriculture\n",
      "sugar\n",
      "Sugar Trade and Exports\n",
      "sugar\n",
      "Corporate earnings and revenue\n",
      "earn\n",
      "Trading commodities: future\n",
      "nickel\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Economic concerns: trade deficits and their impact\n",
      "money-fx\n",
      "Earnings from Discontinued Operations\n",
      "acq\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Dividend Record and Earnings\n",
      "earn\n",
      "Pharmaceutical industry mergers and acquisitions\n",
      "acq\n",
      "Technology Mergers & Acquisitions\n",
      "acq\n",
      "Technology Mergers & Acquisitions\n",
      "acq\n",
      "Agricultural Exports\n",
      "groundnut\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Maritime Accidents and Safety\n",
      "ship\n",
      "Food and Beverage Mergers and Acquisitions\n",
      "earn\n",
      "Labor disputes in various industries\n",
      "ship\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Central banking and monetary policy\n",
      "money-fx\n",
      "Corporate ownership and mergers\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Financial instruments and interest rates\n",
      "earn\n",
      "Financial instruments and interest rates\n",
      "earn\n",
      "Financial instruments and interest rates\n",
      "earn\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Unemployment and Economic Statistics\n",
      "jobs\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "International trade agreements and pricing\n",
      "acq\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Corporate actions: stock splits and earnings\n",
      "acq\n",
      "Financial Performance and Earnings\n",
      "earn\n",
      "Financial instruments and interest rates\n",
      "earn\n",
      "Financial Markets and Trading\n",
      "earn\n",
      "Sugar Trade and Exports\n",
      "sugar\n",
      "International Economic Relations\n",
      "money-fx\n",
      "Sugar Trade and Exports\n",
      "sugar\n",
      "Corporate earnings and revenue\n",
      "earn\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Gold Mining Investment and Production\n",
      "earn\n",
      "Food and Beverage Mergers and Acquisitions\n",
      "acq\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Oil and gas industry mergers and acquisitions\n",
      "acq\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Dividend Finance\n",
      "earn\n",
      "Dividend Finance\n",
      "earn\n",
      "Sugar production and profitability in agriculture\n",
      "sugar\n",
      "Corporate Mergers & Governance\n",
      "earn\n",
      "Sugar Trade and Exports\n",
      "sugar\n",
      "Dividend Finance\n",
      "earn\n",
      "Financial instruments and interest rates\n",
      "earn\n",
      "Pharmaceutical industry mergers and acquisitions\n",
      "acq\n",
      "Financial performance and loan losses\n",
      "earn\n",
      "Pharmaceutical industry mergers and acquisitions\n",
      "earn\n",
      "Corporate actions: stock splits and earnings\n",
      "earn\n",
      "Corporate actions: stock splits and earnings\n",
      "earn\n",
      "Corporate Mergers and Acquisitions\n",
      "acq\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Government Subsidies in Agricultural Trade\n",
      "corn\n",
      "Corporate takeovers and share ownership\n",
      "acq\n",
      "Oil prices and their impact on the energy industry\n",
      "heat\n",
      "Seasonal Agricultural Impacts on Weather\n",
      "wheat\n",
      "Unemployment and Economic Statistics\n",
      "jobs\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Central banking and monetary policy\n",
      "money-fx\n",
      "Federal Reserve and Economic Reserves\n",
      "interest\n",
      "Earnings from Discontinued Operations\n",
      "acq\n",
      "Coffee market trends and pricing strategies\n",
      "earn\n",
      "Dividend Finance\n",
      "earn\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Pharmaceutical industry mergers and acquisitions\n",
      "acq\n",
      "Corporate ownership and mergers\n",
      "acq\n",
      "Corporate earnings and revenue\n",
      "earn\n",
      "Oil prices and their impact on the energy industry\n",
      "crude\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Food and Beverage Mergers and Acquisitions\n",
      "acq\n",
      "Technology Mergers & Acquisitions\n",
      "earn\n",
      "Financial sector consolidation\n",
      "earn\n",
      "Dividend Finance\n",
      "earn\n",
      "Dividend Record and Earnings\n",
      "earn\n",
      "Technology Mergers & Acquisitions\n",
      "earn\n",
      "Dividend records in finance\n",
      "earn\n",
      "Corporate actions: stock splits and earnings\n",
      "earn\n",
      "Corporate Mergers and Acquisitions\n",
      "acq\n",
      "Federal Reserve and Economic Reserves\n",
      "interest\n",
      "Aviation Financing and Technology\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Cocoa trade agreements and markets\n",
      "cocoa\n",
      "Federal Reserve and Economic Reserves\n",
      "interest\n",
      "Agricultural Trade Policy\n",
      "wheat\n",
      "Financial instruments and interest rates\n",
      "earn\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Agricultural commodity markets and trade\n",
      "corn\n",
      "Aviation Financing and Technology\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Corporate actions: stock splits and earnings\n",
      "earn\n",
      "Technology Mergers & Acquisitions\n",
      "acq\n",
      "Trading commodities: future\n",
      "tin\n",
      "Dividend Record and Earnings\n",
      "earn\n",
      "Corporate finance and legal aspects of mergers and acquisitions\n",
      "acq\n",
      "Corporate actions: stock splits and earnings\n",
      "earn\n",
      "Pharmaceutical industry mergers and acquisitions\n",
      "earn\n",
      "Technology Mergers & Acquisitions\n",
      "acq\n",
      "Corporate finance and acquisitions\n",
      "acq\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Technology Mergers & Acquisitions\n",
      "acq\n",
      "Soybean trade and regulation\n",
      "oilseed\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Agricultural Exports\n",
      "wheat\n",
      "Aviation Financing and Technology\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "acq\n",
      "Corporate actions: stock splits and earnings\n",
      "earn\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Pharmaceutical industry mergers and acquisitions\n",
      "acq\n",
      "Corporate Mergers and Acquisitions\n",
      "acq\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Trade Policy and Legislation\n",
      "trade\n",
      "Corporate restructuring and divestiture\n",
      "acq\n",
      "Financial instruments and interest rates\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "acq\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Economic Sanctions and Trade Restrictions in South Africa\n",
      "trade\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Cocoa trade agreements and markets\n",
      "cocoa\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Corporate earnings and dividends\n",
      "earn\n",
      "Dividend Finance\n",
      "earn\n",
      "Food and Beverage Mergers and Acquisitions\n",
      "earn\n",
      "Dividend Record and Earnings\n",
      "earn\n",
      "Coffee trade regulations\n",
      "coffee\n",
      "Trade Agreements and Negotiations\n",
      "crude\n",
      "Pharmaceutical industry mergers and acquisitions\n",
      "acq\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "International trade and agricultural policies\n",
      "rice\n",
      "Corporate financial performance\n",
      "earn\n",
      "Dividend Record and Earnings\n",
      "earn\n",
      "Corporate takeovers and share ownership\n",
      "acq\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Central banking and monetary policy\n",
      "money-fx\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Corporate actions: stock splits and dividends\n",
      "acq\n",
      "Trade Agreements and Negotiations\n",
      "tin\n",
      "Agricultural Exports\n",
      "cotton\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Financial Regulation\n",
      "earn\n",
      "Trade Policy and Legislation\n",
      "trade\n",
      "Aviation Financing and Technology\n",
      "earn\n",
      "Financial instruments and interest rates\n",
      "earn\n",
      "OPEC oil production\n",
      "crude\n",
      "Dividend Finance\n",
      "earn\n",
      "Central banking and monetary policy\n",
      "money-fx\n",
      "Government regulations on agriculture\n",
      "livestock\n",
      "Oil and gas industry mergers and acquisitions\n",
      "crude\n",
      "Corporate takeovers and share ownership\n",
      "acq\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Federal Reserve and Economic Reserves\n",
      "money-supply\n",
      "Corporate Leadership Changes\n",
      "earn\n",
      "OPEC oil production\n",
      "crude\n",
      "Economic Inflation\n",
      "money-supply\n",
      "Corporate earnings and revenue\n",
      "earn\n",
      "Energy industry earnings and writedowns\n",
      "earn\n",
      "International Economics and Finance\n",
      "money-fx\n",
      "Meat industry and trade\n",
      "livestock\n",
      "Oil and gas industry mergers and acquisitions\n",
      "acq\n",
      "Dividend Record and Earnings\n",
      "earn\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "International Trade and Exports\n",
      "trade\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Technology Mergers & Acquisitions\n",
      "copper\n",
      "Corporate earnings and revenue\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Cocoa trade agreements and markets\n",
      "grain\n",
      "Agricultural commodity markets and trade\n",
      "wheat\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Corporate takeovers and share ownership\n",
      "acq\n",
      "Corporate actions: stock splits and dividends\n",
      "earn\n",
      "Corporate actions: stock splits and dividends\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "acq\n",
      "Dividend Record and Earnings\n",
      "earn\n",
      "Agricultural Exports\n",
      "corn\n",
      "Agricultural Exports\n",
      "livestock\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Aviation industry consolidation\n",
      "acq\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Dividend Finance\n",
      "earn\n",
      "Food and Beverage Mergers and Acquisitions\n",
      "acq\n",
      "Government regulations on agriculture\n",
      "grain\n",
      "Coffee trade regulations\n",
      "acq\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Sugar Trade and Exports\n",
      "acq\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Monetary Policy and Interest Rates\n",
      "interest\n",
      "Technology Mergers & Acquisitions\n",
      "acq\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "International Trade and Exports\n",
      "trade\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Financial instruments and interest rates\n",
      "earn\n",
      "Oil prices and their impact on the energy industry\n",
      "nat-gas\n",
      "Financial Management and Debt\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Coffee market trends and pricing strategies\n",
      "coffee\n",
      "Corporate actions: stock splits and earnings\n",
      "acq\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Technology Mergers & Acquisitions\n",
      "acq\n",
      "OPEC oil production\n",
      "crude\n",
      "Agricultural commodity markets and trade\n",
      "wheat\n",
      "OPEC oil production\n",
      "crude\n",
      "Financial performance and loan losses\n",
      "earn\n",
      "Technology Mergers & Acquisitions\n",
      "earn\n",
      "Corporate takeovers and share ownership\n",
      "acq\n",
      "Oil prices and their impact on the energy industry\n",
      "gas\n",
      "Economic concerns: trade deficits and their impact\n",
      "trade\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Financial performance and loan losses\n",
      "earn\n",
      "Coffee trade regulations\n",
      "coffee\n",
      "Dividend Finance\n",
      "earn\n",
      "Pharmaceutical industry mergers and acquisitions\n",
      "acq\n",
      "Central banking and monetary policy\n",
      "money-fx\n",
      "Unemployment and Economic Statistics\n",
      "jobs\n",
      "Oil and gas industry mergers and acquisitions\n",
      "acq\n",
      "International trade and agricultural policies\n",
      "rice\n",
      "Agricultural Exports\n",
      "alum\n",
      "International Economic Relations\n",
      "trade\n",
      "Global Economic Trade\n",
      "trade\n",
      "Central banking and monetary policy\n",
      "money-fx\n",
      "Trading commodities: future\n",
      "earn\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "OPEC oil production and prices\n",
      "earn\n",
      "Central banking and monetary policy\n",
      "interest\n",
      "Economic concerns: trade deficits and their impact\n",
      "trade\n",
      "Oil prices and their impact on the energy industry\n",
      "earn\n",
      "Agricultural Production and Disease\n",
      "wheat\n",
      "Trading commodities: future\n",
      "earn\n",
      "Corporate earnings and revenue\n",
      "earn\n",
      "Sugar Trade and Exports\n",
      "sugar\n",
      "Oil and gas industry mergers and acquisitions\n",
      "earn\n",
      "Corporate earnings and revenue\n",
      "earn\n",
      "Monetary Policy and Currency\n",
      "yen\n",
      "Corporate takeovers and share ownership\n",
      "acq\n",
      "Corporate earnings and revenue\n",
      "earn\n",
      "Aviation Financing and Technology\n",
      "rubber\n",
      "Economic aspects of interest rates\n",
      "interest\n",
      "Mining and Gold Reserves\n",
      "gold\n",
      "Corporate actions: stock splits and dividends\n",
      "earn\n",
      "Sugar Trade and Exports\n",
      "sugar\n",
      "OPEC quota and oil production\n",
      "nat-gas\n",
      "Corporate actions: stock splits and earnings\n",
      "earn\n",
      "Trading commodities: future\n",
      "strategic-metal\n",
      "Earnings from Discontinued Operations\n",
      "acq\n",
      "International Trade\n",
      "rubber\n",
      "Earnings from Discontinued Operations\n",
      "acq\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Trade Surplus and Economic Balance\n",
      "trade\n",
      "Dividend Finance\n",
      "earn\n",
      "Trade Surplus and Economic Balance\n",
      "trade\n",
      "Financial aspects of currency and economy\n",
      "money-fx\n",
      "Monetary Policy and Currency\n",
      "yen\n",
      "Central banking and monetary policy\n",
      "money-fx\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Telecom earnings quarterly results\n",
      "reserves\n",
      "Monetary Policy and Currency\n",
      "money-fx\n",
      "Financial instruments and interest rates\n",
      "interest\n",
      "Sugar Trade and Exports\n",
      "sugar\n",
      "International trade agreements and pricing\n",
      "trade\n",
      "Financial instruments and interest rates\n",
      "money-fx\n",
      "Monetary Policy and Currency\n",
      "money-fx\n",
      "Monetary Policy and Currency\n",
      "money-fx\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Global Economic Trade\n",
      "trade\n",
      "Monetary Policy and Currency\n",
      "money-fx\n",
      "Monetary Policy and Currency\n",
      "yen\n",
      "Monetary Policy and Currency\n",
      "yen\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Corporate earnings and revenue\n",
      "earn\n",
      "Monetary Policy and Currency\n",
      "money-fx\n",
      "Sugar production and profitability in agriculture\n",
      "sugar\n",
      "Corporate actions: stock splits and dividends\n",
      "acq\n",
      "Financial sector consolidation\n",
      "acq\n",
      "Corporate takeovers and share ownership\n",
      "acq\n",
      "Monetary Policy and Currency\n",
      "gold\n",
      "International trade and agricultural policies\n",
      "wheat\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Corporate earnings and revenue\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Corporate finance and acquisitions\n",
      "acq\n",
      "Monetary Policy and Currency\n",
      "gold\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Monetary Policy and Currency\n",
      "yen\n",
      "Labor disputes in various industries\n",
      "ship\n",
      "Unemployment and Economic Statistics\n",
      "jobs\n",
      "Corporate ownership and mergers\n",
      "acq\n",
      "Corporate Finance: Mergers & Acquisitions\n",
      "earn\n",
      "International Economics and Finance\n",
      "earn\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Dividend records in finance\n",
      "earn\n",
      "Agricultural Exports\n",
      "wheat\n",
      "Corporate finance and acquisitions\n",
      "acq\n",
      "Monetary Policy and Currency\n",
      "yen\n",
      "Agricultural commodity markets and trade\n",
      "livestock\n",
      "Economic concerns: trade deficits and their impact\n",
      "money-fx\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Financial instruments and interest rates\n",
      "earn\n",
      "Trading commodities: future\n",
      "crude\n",
      "Financial instruments and interest rates\n",
      "earn\n",
      "Central banking and monetary policy\n",
      "money-fx\n",
      "Central banking and monetary policy\n",
      "money-fx\n",
      "Monetary Policy and Currency\n",
      "money-fx\n",
      "Dividend Finance\n",
      "earn\n",
      "Logistics and Transportation of Shipped Goods\n",
      "ship\n",
      "Meat industry and trade\n",
      "livestock\n",
      "Technology Mergers & Acquisitions\n",
      "earn\n",
      "Seasonal Agricultural Impacts on Weather\n",
      "oilseed\n",
      "Central banking and monetary policy\n",
      "money-fx\n",
      "Seasonal Agricultural Impacts on Weather\n",
      "coffee\n",
      "Earnings from Discontinued Operations\n",
      "acq\n",
      "Economic Policy and Central Banks\n",
      "bop\n",
      "Corporate Mergers and Acquisitions\n",
      "acq\n",
      "Corporate takeovers and share ownership\n",
      "acq\n",
      "Dividend Record and Earnings\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Monetary Policy and Currency\n",
      "yen\n",
      "Financial instruments and interest rates\n",
      "interest\n",
      "Economic concerns: trade deficits and their impact\n",
      "gnp\n",
      "Economic Inflation\n",
      "money-supply\n",
      "Corporate actions: stock splits and earnings\n",
      "earn\n",
      "Trading commodities: future\n",
      "lead\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Corporate actions: stock splits and earnings\n",
      "earn\n",
      "Financial instruments and interest rates\n",
      "interest\n",
      "Corporate finance and acquisitions\n",
      "acq\n",
      "Monetary Policy and Currency\n",
      "dlr\n",
      "OPEC oil production and prices\n",
      "lead\n",
      "Earnings from Discontinued Operations\n",
      "acq\n",
      "Pharmaceutical industry mergers and acquisitions\n",
      "earn\n",
      "Government policies on agriculture\n",
      "wheat\n",
      "Trading commodities: future\n",
      "cotton\n",
      "Corporate takeovers and share ownership\n",
      "earn\n",
      "Pharmaceutical industry mergers and acquisitions\n",
      "earn\n",
      "Corporate takeovers and share ownership\n",
      "acq\n",
      "Agricultural commodity markets and trade\n",
      "wheat\n",
      "Trade Surplus and Economic Balance\n",
      "bop\n",
      "OPEC oil production\n",
      "oilseed\n",
      "OPEC oil production and prices\n",
      "acq\n",
      "Financial instruments and interest rates\n",
      "interest\n",
      "Financial instruments and interest rates\n",
      "interest\n",
      "Seasonal Agricultural Impacts on Weather\n",
      "grain\n",
      "Financial regulatory compliance and fraud\n",
      "earn\n",
      "Agricultural Exports\n",
      "wheat\n",
      "Retail sales and economic growth\n",
      "earn\n",
      "Coffee market trends and pricing strategies\n",
      "coffee\n",
      "Corporate actions: stock splits and earnings\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Technology Mergers & Acquisitions\n",
      "acq\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Corporate takeovers and share ownership\n",
      "acq\n",
      "Corporate takeovers and share ownership\n",
      "acq\n",
      "Dividend Record and Earnings\n",
      "earn\n",
      "Corporate earnings and revenue\n",
      "earn\n",
      "Corporate takeovers and share ownership\n",
      "acq\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Economic aspects of interest rates\n",
      "interest\n",
      "Earnings from Discontinued Operations\n",
      "acq\n",
      "Oil prices and their impact on the energy industry\n",
      "heat\n",
      "Financial instruments and interest rates\n",
      "interest\n",
      "Oil and gas industry mergers and acquisitions\n",
      "earn\n",
      "Federal Reserve and Economic Reserves\n",
      "money-fx\n",
      "Trading commodities: future\n",
      "acq\n",
      "Monetary policy and interest rates\n",
      "interest\n",
      "Agricultural commodity markets and trade\n",
      "wheat\n",
      "Trading commodities: future\n",
      "potato\n",
      "Monetary Policy and Currency\n",
      "money-fx\n",
      "Corporate Governance and Mergers & Acquisitions\n",
      "iron-steel\n",
      "Trading commodities: future\n",
      "tin\n",
      "Monetary Policy and Currency\n",
      "money-fx\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Pharmaceutical industry mergers and acquisitions\n",
      "acq\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Aviation Financing and Technology\n",
      "earn\n",
      "Monetary Policy and Currency\n",
      "money-fx\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Financial Markets and Trading\n",
      "iron-steel\n",
      "Pharmaceutical industry mergers and acquisitions\n",
      "earn\n",
      "Pharmaceutical industry mergers and acquisitions\n",
      "earn\n",
      "Meat industry and trade\n",
      "livestock\n",
      "Sugar production and profitability in agriculture\n",
      "cotton\n",
      "Trade Surplus and Economic Balance\n",
      "trade\n",
      "Agricultural Exports\n",
      "corn\n",
      "Agricultural Exports\n",
      "barley\n",
      "Corporate earnings and revenue\n",
      "earn\n",
      "Cocoa trade agreements and markets\n",
      "cocoa\n",
      "Dividend Record and Earnings\n",
      "earn\n",
      "Economic concerns: trade deficits and their impact\n",
      "money-fx\n",
      "Media industry mergers and acquisitions\n",
      "acq\n",
      "Fiscal policy and government spending\n",
      "trade\n",
      "Corporate earnings and revenue\n",
      "earn\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Corporate Mergers & Governance\n",
      "earn\n",
      "Corporate actions: stock splits and earnings\n",
      "acq\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Seasonal Agricultural Impacts on Weather\n",
      "orange\n",
      "Agricultural Exports\n",
      "wheat\n",
      "Financial aspects of currency and economy\n",
      "money-fx\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Financial instruments and interest rates\n",
      "earn\n",
      "Corporate takeovers and share ownership\n",
      "acq\n",
      "Soybean trade and regulation\n",
      "soybean\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Monetary Policy and Currency\n",
      "money-fx\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Corporate earnings and revenue\n",
      "earn\n",
      "Monetary Policy and Currency\n",
      "money-fx\n",
      "Financial performance and loan losses\n",
      "earn\n",
      "Sugar Trade and Exports\n",
      "sugar\n",
      "Corporate Mergers and Acquisitions\n",
      "acq\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Pharmaceutical industry mergers and acquisitions\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Dividend Record and Earnings\n",
      "earn\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Logistics and Transportation of Shipped Goods\n",
      "ship\n",
      "Corporate actions: stock splits and dividends\n",
      "acq\n",
      "Financial instruments and interest rates\n",
      "earn\n",
      "Corporate finance and acquisitions\n",
      "acq\n",
      "Aviation Financing and Technology\n",
      "earn\n",
      "Global Agricultural Markets\n",
      "corn\n",
      "International trade agreements and pricing\n",
      "oilseed\n",
      "Corporate finance and investment\n",
      "earn\n",
      "Coffee trade regulations\n",
      "coffee\n",
      "Logistics and Transportation of Shipped Goods\n",
      "ship\n",
      "Sugar Trade and Exports\n",
      "sugar\n",
      "Logistics and Transportation of Shipped Goods\n",
      "ship\n",
      "Financial instruments and interest rates\n",
      "earn\n",
      "Corporate takeovers and share ownership\n",
      "earn\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Dividend Finance\n",
      "earn\n",
      "Coffee trade regulations\n",
      "coffee\n",
      "Earnings from Discontinued Operations\n",
      "acq\n",
      "Retail sales and economic growth\n",
      "retail\n",
      "International Economics and Finance\n",
      "earn\n",
      "Soybean trade and regulation\n",
      "soy-oil\n",
      "Corporate Finance: Mergers & Acquisitions\n",
      "earn\n",
      "Corporate actions: stock splits and earnings\n",
      "earn\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Financial performance and loan losses\n",
      "earn\n",
      "OPEC oil production and prices\n",
      "lead\n",
      "Financial instruments and interest rates\n",
      "reserves\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Dividend Finance\n",
      "earn\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Industry regulation and consumer protection\n",
      "acq\n",
      "Corporate earnings and revenue\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "acq\n",
      "Technology Mergers & Acquisitions\n",
      "earn\n",
      "Trade Surplus and Economic Balance\n",
      "bop\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Meat industry and trade\n",
      "livestock\n",
      "Corporate finance and acquisitions\n",
      "earn\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Mining and Gold Reserves\n",
      "gold\n",
      "Sugar production and profitability in agriculture\n",
      "sugar\n",
      "Central banking and monetary policy\n",
      "interest\n",
      "Financial instruments and interest rates\n",
      "earn\n",
      "Dividend Finance\n",
      "earn\n",
      "Corporate takeovers and share ownership\n",
      "acq\n",
      "Mining and Gold Reserves\n",
      "gold\n",
      "Economic aspects of interest rates\n",
      "interest\n",
      "Trade Agreements and Negotiations\n",
      "tin\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Corporate finance and investment\n",
      "earn\n",
      "Corporate earnings and revenue\n",
      "earn\n",
      "Oil prices and their impact on the energy industry\n",
      "heat\n",
      "Gold Mining Investment and Production\n",
      "copper\n",
      "Financial instruments and interest rates\n",
      "earn\n",
      "Financial instruments and interest rates\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "wheat\n",
      "Dividend Finance\n",
      "strategic-metal\n",
      "Trade Surplus and Economic Balance\n",
      "trade\n",
      "Financial performance and loan losses\n",
      "earn\n",
      "International trade and agricultural policies\n",
      "grain\n",
      "International trade and agricultural policies\n",
      "grain\n",
      "Agricultural commodity markets and trade\n",
      "wheat\n",
      "Agricultural market trends and policies\n",
      "orange\n",
      "Agricultural market trends and policies\n",
      "orange\n",
      "Corporate actions: stock splits and earnings\n",
      "earn\n",
      "Corporate ownership and mergers\n",
      "earn\n",
      "Agricultural market trends and policies\n",
      "wheat\n",
      "Agricultural market trends and policies\n",
      "orange\n",
      "Sugar production and profitability in agriculture\n",
      "orange\n",
      "International trade and agricultural policies\n",
      "corn\n",
      "Agricultural market trends and policies\n",
      "potato\n",
      "Government regulations on agriculture\n",
      "potato\n",
      "Government Payments in Agriculture\n",
      "wheat\n",
      "Corporate takeovers and share ownership\n",
      "acq\n",
      "International trade and agricultural policies\n",
      "wheat\n",
      "Agricultural Exports\n",
      "wheat\n",
      "Soybean trade and regulation\n",
      "soybean\n",
      "Agricultural market trends and policies\n",
      "wheat\n",
      "Corporate actions: stock splits and dividends\n",
      "earn\n",
      "Agricultural market trends and policies\n",
      "wheat\n",
      "International trade and agricultural policies\n",
      "wheat\n",
      "Agricultural Exports\n",
      "wheat\n",
      "Agricultural Exports\n",
      "corn\n",
      "Dividend Finance\n",
      "strategic-metal\n",
      "Agricultural market trends and policies\n",
      "wheat\n",
      "Agricultural commodity markets and trade\n",
      "wheat\n",
      "Agricultural market trends and policies\n",
      "cotton\n",
      "Agricultural Exports\n",
      "corn\n",
      "Corporate actions: stock splits and dividends\n",
      "earn\n",
      "Food and Beverage Mergers and Acquisitions\n",
      "acq\n",
      "Media industry mergers and acquisitions\n",
      "earn\n",
      "Pharmaceutical industry mergers and acquisitions\n",
      "acq\n",
      "Technology Mergers & Acquisitions\n",
      "earn\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Corporate financial performance\n",
      "earn\n",
      "Agricultural Exports\n",
      "wheat\n",
      "Earnings from Discontinued Operations\n",
      "acq\n",
      "Agricultural commodity markets and trade\n",
      "wheat\n",
      "Corporate actions: stock splits and earnings\n",
      "acq\n",
      "Agricultural Exports\n",
      "wheat\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Dividend records in finance\n",
      "earn\n",
      "Corporate Mergers and Acquisitions\n",
      "acq\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "acq\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Financial writedowns and earnings\n",
      "earn\n",
      "Soybean trade and regulation\n",
      "soy-meal\n",
      "Agricultural Exports\n",
      "corn\n",
      "Agricultural Exports\n",
      "wheat\n",
      "Corporate takeovers and share ownership\n",
      "earn\n",
      "Corporate takeovers and share ownership\n",
      "earn\n",
      "Agricultural Exports\n",
      "corn\n",
      "Agricultural Exports\n",
      "wheat\n",
      "Corporate finance and acquisitions\n",
      "acq\n",
      "Corporate earnings and revenue\n",
      "earn\n",
      "Agricultural commodity markets and trade\n",
      "grain\n",
      "Agricultural Exports\n",
      "wheat\n",
      "Agricultural market trends and policies\n",
      "wheat\n",
      "Food and Beverage Mergers and Acquisitions\n",
      "acq\n",
      "Food and Beverage Mergers and Acquisitions\n",
      "acq\n",
      "Soybean trade and regulation\n",
      "soybean\n",
      "Soybean trade and regulation\n",
      "soy-meal\n",
      "Soybean trade and regulation\n",
      "soy-oil\n",
      "Oil prices and their impact on the energy industry\n",
      "fuel\n",
      "Agricultural Exports\n",
      "cotton\n",
      "Agricultural Exports\n",
      "wheat\n",
      "Agricultural Exports\n",
      "rice\n",
      "Soybean trade and regulation\n",
      "orange\n",
      "Dividend records in finance\n",
      "earn\n",
      "Natural Disasters and Oil Infrastructure\n",
      "copper\n",
      "Financial writedowns and earnings\n",
      "earn\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Corporate ownership and mergers\n",
      "earn\n",
      "Oil prices and their impact on the energy industry\n",
      "crude\n",
      "Logistics and Transportation of Shipped Goods\n",
      "ship\n",
      "Energy industry earnings and writedowns\n",
      "earn\n",
      "Global Agricultural Markets\n",
      "livestock\n",
      "Financial performance and loan losses\n",
      "money-supply\n",
      "Financial instruments and interest rates\n",
      "money-supply\n",
      "Agricultural market trends and policies\n",
      "wheat\n",
      "Soybean trade and regulation\n",
      "soybean\n",
      "Financial Services and Institutions\n",
      "acq\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Logistics and Transportation of Shipped Goods\n",
      "orange\n",
      "Corporate ownership and mergers\n",
      "acq\n",
      "Earnings from Discontinued Operations\n",
      "acq\n",
      "Oil and gas industry mergers and acquisitions\n",
      "crude\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Corporate finance and acquisitions\n",
      "acq\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Corporate Governance and Mergers & Acquisitions\n",
      "acq\n",
      "Global Agricultural Markets\n",
      "cotton\n",
      "Agricultural Exports\n",
      "oilseed\n",
      "Federal Reserve and Economic Reserves\n",
      "money-supply\n",
      "Financial instruments and interest rates\n",
      "money-supply\n",
      "Federal Reserve and Economic Reserves\n",
      "money-supply\n",
      "Energy industry earnings and writedowns\n",
      "earn\n",
      "Financial instruments and interest rates\n",
      "earn\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Corporate earnings and revenue\n",
      "earn\n",
      "Federal Reserve and Economic Reserves\n",
      "money-supply\n",
      "Gold Mining Investment and Production\n",
      "copper\n",
      "Federal Reserve and Economic Reserves\n",
      "money-supply\n",
      "Corporate Mergers and Acquisitions\n",
      "acq\n",
      "Media industry mergers and acquisitions\n",
      "acq\n",
      "Corporate finance and investment\n",
      "money-supply\n",
      "Earnings from Discontinued Operations\n",
      "acq\n",
      "Agricultural Exports\n",
      "corn\n",
      "Corporate takeovers and share ownership\n",
      "acq\n",
      "Dividend Record and Earnings\n",
      "earn\n",
      "Monetary Policy and Currency\n",
      "money-fx\n",
      "OPEC oil production and prices\n",
      "crude\n",
      "Economic Indicators and Inflation\n",
      "money-fx\n",
      "Oil and gas industry mergers and acquisitions\n",
      "nat-gas\n",
      "Trading commodities: future\n",
      "money-fx\n",
      "Agricultural commodity markets and trade\n",
      "corn\n",
      "Corporate earnings and revenue\n",
      "earn\n",
      "Telecom earnings quarterly results\n",
      "ship\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Energy industry earnings and writedowns\n",
      "earn\n",
      "Corporate takeovers and share ownership\n",
      "acq\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Corporate finance and acquisitions\n",
      "earn\n",
      "Corporate finance and investment\n",
      "acq\n",
      "Pharmaceutical industry mergers and acquisitions\n",
      "acq\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Technology Mergers & Acquisitions\n",
      "acq\n",
      "Corporate takeovers and share ownership\n",
      "acq\n",
      "Telecom earnings quarterly results\n",
      "ship\n",
      "Technology Mergers & Acquisitions\n",
      "acq\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Trade Surplus and Economic Balance\n",
      "trade\n",
      "Monetary policy and interest rates\n",
      "interest\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Financial instruments and interest rates\n",
      "money-supply\n",
      "Financial aspects of currency and economy\n",
      "money-fx\n",
      "Financial aspects of currency and economy\n",
      "money-fx\n",
      "Financial aspects of currency and economy\n",
      "money-fx\n",
      "Financial aspects of currency and economy\n",
      "money-fx\n",
      "Agricultural commodity markets and trade\n",
      "wheat\n",
      "Economic concerns: trade deficits and their impact\n",
      "money-fx\n",
      "Economic aspects of interest rates\n",
      "interest\n",
      "Monetary Policy and Currency\n",
      "interest\n",
      "Labor disputes in various industries\n",
      "ship\n",
      "Natural Disasters and Oil Infrastructure\n",
      "crude\n",
      "Agricultural Exports\n",
      "rice\n",
      "Soybean trade and regulation\n",
      "soy-oil\n",
      "Monetary policy and interest rates\n",
      "interest\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Trade Policy and Protectionism\n",
      "trade\n",
      "Corporate takeovers and share ownership\n",
      "acq\n",
      "Sugar Trade and Exports\n",
      "sugar\n",
      "Trade Policy and Legislation\n",
      "trade\n",
      "Financial Regulation\n",
      "trade\n",
      "Financial instruments and interest rates\n",
      "interest\n",
      "Coffee trade regulations\n",
      "coffee\n",
      "Cocoa trade agreements and markets\n",
      "cocoa\n",
      "Agricultural Production and Disease\n",
      "corn\n",
      "Unemployment and Economic Statistics\n",
      "jobs\n",
      "Economic concerns: trade deficits and their impact\n",
      "cpi\n",
      "Financial aspects of currency and economy\n",
      "money-fx\n",
      "Agricultural Exports\n",
      "cotton\n",
      "Fiscal policy and government spending\n",
      "cpi\n",
      "Oil prices and their impact on the energy industry\n",
      "copper\n",
      "Financial aspects of currency and economy\n",
      "money-fx\n",
      "Corporate Legal Disputes\n",
      "earn\n",
      "Coffee market trends and pricing strategies\n",
      "coffee\n",
      "Gold Mining Investment and Production\n",
      "acq\n",
      "Financial aspects of currency and economy\n",
      "income\n",
      "Dividend Finance\n",
      "earn\n",
      "Financial instruments and interest rates\n",
      "interest\n",
      "Gold Mining Investment and Production\n",
      "iron-steel\n",
      "Gold Mining Investment and Production\n",
      "copper\n",
      "International Trade and Exports\n",
      "trade\n",
      "International Trade and Exports\n",
      "trade\n",
      "Corporate restructuring and divestiture\n",
      "acq\n",
      "Central banking and monetary policy\n",
      "money-fx\n",
      "Economic Indicators and Inflation\n",
      "cpi\n",
      "Corporate takeovers and share ownership\n",
      "acq\n",
      "Economic concerns: trade deficits and their impact\n",
      "trade\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Corporate financial performance\n",
      "earn\n",
      "Economic growth and industrial production\n",
      "ipi\n",
      "Monetary Policy and Currency\n",
      "money-fx\n",
      "Agricultural market trends and policies\n",
      "wheat\n",
      "Earnings from Discontinued Operations\n",
      "acq\n",
      "Monetary Policy and Currency\n",
      "meal-feed\n",
      "Agricultural commodity markets and trade\n",
      "corn\n",
      "Cocoa trade agreements and markets\n",
      "cocoa\n",
      "Financial instruments and interest rates\n",
      "gold\n",
      "Central banking and monetary policy\n",
      "interest\n",
      "Earnings from Discontinued Operations\n",
      "acq\n",
      "Trading commodities: future\n",
      "acq\n",
      "Economic growth and industrial production\n",
      "cpi\n",
      "Earnings from Discontinued Operations\n",
      "acq\n",
      "Financial instruments and interest rates\n",
      "interest\n",
      "Gold Mining Investment and Production\n",
      "gold\n",
      "Corporate takeovers and share ownership\n",
      "acq\n",
      "OPEC oil production and prices\n",
      "wpi\n",
      "Oil and gas industry mergers and acquisitions\n",
      "nat-gas\n",
      "Corporate actions: stock splits and earnings\n",
      "earn\n",
      "Tax Reform and Foreign Aid\n",
      "gnp\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Corporate takeovers and share ownership\n",
      "acq\n",
      "Corporate actions: stock splits and earnings\n",
      "earn\n",
      "Corporate takeovers and share ownership\n",
      "acq\n",
      "Financial aspects of currency and economy\n",
      "money-fx\n",
      "International trade agreements and pricing\n",
      "alum\n",
      "Financial instruments and interest rates\n",
      "interest\n",
      "Sugar Trade and Exports\n",
      "sugar\n",
      "Central banking and monetary policy\n",
      "money-fx\n",
      "Financial aspects of currency and economy\n",
      "money-fx\n",
      "Economic concerns: trade deficits and their impact\n",
      "trade\n",
      "Global Agricultural Markets\n",
      "palm-oil\n",
      "Trading commodities: future\n",
      "zinc\n",
      "Economic Indicators and Inflation\n",
      "wpi\n",
      "Economic concerns: trade deficits and their impact\n",
      "gnp\n",
      "Economic Indicators and Inflation\n",
      "gnp\n",
      "Monetary Policy and Currency\n",
      "dlr\n",
      "Business and Economic Growth\n",
      "gnp\n",
      "Financial instruments and interest rates\n",
      "earn\n",
      "Technology Mergers & Acquisitions\n",
      "acq\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Corporate earnings and revenue\n",
      "earn\n",
      "Media industry mergers and acquisitions\n",
      "acq\n",
      "Corporate takeovers and share ownership\n",
      "acq\n",
      "Mining and Gold Reserves\n",
      "gold\n",
      "International trade agreements and pricing\n",
      "money-fx\n",
      "Monetary Policy and Interest Rates\n",
      "interest\n",
      "Corporate restructuring and divestiture\n",
      "acq\n",
      "Corporate takeovers and share ownership\n",
      "acq\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Financial instruments and interest rates\n",
      "earn\n",
      "Financial Markets and Trading\n",
      "acq\n",
      "Coffee trade regulations\n",
      "tea\n",
      "Economic Stability and Growth\n",
      "cpi\n",
      "Corporate Mergers & Governance\n",
      "earn\n",
      "Pharmaceutical industry mergers and acquisitions\n",
      "earn\n",
      "Mining and Gold Reserves\n",
      "acq\n",
      "Corporate ownership and mergers\n",
      "acq\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Government Payments in Agriculture\n",
      "livestock\n",
      "Oil and gas industry mergers and acquisitions\n",
      "nat-gas\n",
      "Economic Inflation\n",
      "cpi\n",
      "Pharmaceutical industry mergers and acquisitions\n",
      "acq\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Corporate earnings and revenue\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "acq\n",
      "Gold Mining Investment and Production\n",
      "gold\n",
      "Central banking and monetary policy\n",
      "money-supply\n",
      "Monetary Policy and Currency\n",
      "money-fx\n",
      "Financial writedowns and earnings\n",
      "earn\n",
      "Meat industry and trade\n",
      "livestock\n",
      "Economic concerns: trade deficits and their impact\n",
      "trade\n",
      "Trade Policy and Legislation\n",
      "trade\n",
      "Corporate ownership and mergers\n",
      "acq\n",
      "Corporate ownership and mergers\n",
      "earn\n",
      "Corporate actions: stock splits and earnings\n",
      "earn\n",
      "Financial instruments and interest rates\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Trading commodities: future\n",
      "oilseed\n",
      "Corporate Mergers and Acquisitions\n",
      "earn\n",
      "Economic Inflation\n",
      "cpi\n",
      "Corporate financial performance\n",
      "earn\n",
      "OPEC oil production and prices\n",
      "wpi\n",
      "Agricultural commodity markets and trade\n",
      "wheat\n",
      "Financial instruments and interest rates\n",
      "interest\n",
      "Financial instruments and interest rates\n",
      "earn\n",
      "Corporate Mergers & Governance\n",
      "acq\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Coffee trade regulations\n",
      "coffee\n",
      "Economic Indicators and Inflation\n",
      "lei\n",
      "Monetary Policy and Currency\n",
      "money-supply\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Monetary Policy and Currency\n",
      "money-fx\n",
      "Mining and Gold Reserves\n",
      "gold\n",
      "Corporate finance and acquisitions\n",
      "acq\n",
      "Corporate actions: stock splits and earnings\n",
      "earn\n",
      "OPEC oil production and prices\n",
      "lead\n",
      "Corporate Mergers & Governance\n",
      "earn\n",
      "Corporate finance and acquisitions\n",
      "earn\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Monetary Policy and Currency\n",
      "interest\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Corporate actions: stock splits and earnings\n",
      "earn\n",
      "Economic Stability and Growth\n",
      "earn\n",
      "Food and Beverage Mergers and Acquisitions\n",
      "acq\n",
      "Pharmaceutical industry mergers and acquisitions\n",
      "earn\n",
      "Financial instruments and interest rates\n",
      "interest\n",
      "Corporate takeovers and share ownership\n",
      "acq\n",
      "Economic Indicators and Inflation\n",
      "lei\n",
      "Financial performance and loan losses\n",
      "earn\n",
      "Corporate earnings and revenue\n",
      "earn\n",
      "Oil prices and their impact on the energy industry\n",
      "fuel\n",
      "Corporate finance and acquisitions\n",
      "earn\n",
      "Agricultural Exports\n",
      "livestock\n",
      "Technology Mergers & Acquisitions\n",
      "earn\n",
      "OPEC oil production\n",
      "alum\n",
      "Financial performance and loan losses\n",
      "earn\n",
      "Gold Mining Investment and Production\n",
      "alum\n",
      "Corporate Finance: Mergers & Acquisitions\n",
      "earn\n",
      "Sugar Trade and Exports\n",
      "sugar\n",
      "Federal Reserve and Economic Reserves\n",
      "money-fx\n",
      "Pharmaceutical industry mergers and acquisitions\n",
      "acq\n",
      "Corporate earnings and revenue\n",
      "earn\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Corporate earnings and revenue\n",
      "earn\n",
      "Corporate earnings and revenue\n",
      "earn\n",
      "Real Estate and Construction Economy\n",
      "earn\n",
      "Corporate earnings and revenue\n",
      "earn\n",
      "Corporate finance and investment\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Trade Policy and Legislation\n",
      "trade\n",
      "Seasonal Agricultural Impacts on Weather\n",
      "grain\n",
      "Corporate actions: stock splits and earnings\n",
      "earn\n",
      "OPEC oil production and prices\n",
      "veg-oil\n",
      "Corporate finance and investment\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "International Trade and Diplomacy\n",
      "ship\n",
      "Financial instruments and interest rates\n",
      "earn\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Corporate takeovers and share ownership\n",
      "acq\n",
      "Financial Earning Taxation\n",
      "earn\n",
      "Corporate Legal Disputes\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Corporate actions: stock splits and earnings\n",
      "earn\n",
      "Meat industry and trade\n",
      "livestock\n",
      "Corporate finance and investment\n",
      "earn\n",
      "Federal Reserve and Economic Reserves\n",
      "money-fx\n",
      "Financial performance\n",
      "earn\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Corporate financial performance\n",
      "earn\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Dividend Finance\n",
      "earn\n",
      "Dividend Finance\n",
      "earn\n",
      "Corporate takeovers and share ownership\n",
      "acq\n",
      "Dividend Finance\n",
      "earn\n",
      "Corporate finance and acquisitions\n",
      "acq\n",
      "Financial performance and loan losses\n",
      "earn\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Energy industry earnings and writedowns\n",
      "earn\n",
      "Food and Beverage Mergers and Acquisitions\n",
      "acq\n",
      "Corporate restructuring and divestiture\n",
      "acq\n",
      "Technology Mergers & Acquisitions\n",
      "acq\n",
      "Corporate takeovers and share ownership\n",
      "earn\n",
      "Corporate ownership and mergers\n",
      "acq\n",
      "Financial instruments and interest rates\n",
      "interest\n",
      "Corporate takeovers and share ownership\n",
      "acq\n",
      "Technology Mergers & Acquisitions\n",
      "earn\n",
      "Economic Indicators and Inflation\n",
      "cpi\n",
      "Dividend Finance\n",
      "earn\n",
      "Corporate ownership and mergers\n",
      "earn\n",
      "Corporate restructuring through mergers and acquisitions\n",
      "acq\n",
      "Corporate ownership and mergers\n",
      "acq\n",
      "Business Investment and Finance\n",
      "earn\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Corporate earnings and revenue\n",
      "earn\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Corporate takeovers and share ownership\n",
      "acq\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Oil prices and their impact on the energy industry\n",
      "crude\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Japanese Shipbuilding Industry\n",
      "oilseed\n",
      "Financial performance and loan losses\n",
      "earn\n",
      "Energy industry earnings and writedowns\n",
      "earn\n",
      "OPEC oil production\n",
      "crude\n",
      "Technology Mergers & Acquisitions\n",
      "earn\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Economic concerns: trade deficits and their impact\n",
      "trade\n",
      "Corporate takeovers and share ownership\n",
      "acq\n",
      "Corporate actions: stock splits and earnings\n",
      "acq\n",
      "Corporate financial performance\n",
      "earn\n",
      "Technology Mergers & Acquisitions\n",
      "acq\n",
      "Earnings from Discontinued Operations\n",
      "acq\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "acq\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "acq\n",
      "Corporate takeovers and share ownership\n",
      "acq\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Oil and gas industry mergers and acquisitions\n",
      "acq\n",
      "Technology Mergers & Acquisitions\n",
      "acq\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Government Payments in Agriculture\n",
      "earn\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Corporate actions: stock splits and earnings\n",
      "acq\n",
      "Dividend Finance\n",
      "earn\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Technology Mergers & Acquisitions\n",
      "acq\n",
      "Financial Performance and Losses\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Corporate financial performance\n",
      "earn\n",
      "Aviation Financing and Technology\n",
      "earn\n",
      "Corporate actions: stock splits and earnings\n",
      "earn\n",
      "Financial performance and loan losses\n",
      "earn\n",
      "Financial instruments and interest rates\n",
      "earn\n",
      "Trade Agreements and Negotiations\n",
      "acq\n",
      "Corporate earnings and revenue\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Sugar Trade and Exports\n",
      "nat-gas\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Economic concerns: trade deficits and their impact\n",
      "trade\n",
      "Corporate Mergers and Acquisitions\n",
      "acq\n",
      "Aviation Financing and Technology\n",
      "acq\n",
      "Corporate finance and acquisitions\n",
      "acq\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Meat industry and trade\n",
      "livestock\n",
      "Corporate finance and acquisitions\n",
      "acq\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "acq\n",
      "Agricultural Exports\n",
      "wheat\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Telecom earnings quarterly results\n",
      "earn\n",
      "Food and Beverage Mergers and Acquisitions\n",
      "acq\n",
      "Financial Earning Taxation\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Corporate Finance: Mergers & Acquisitions\n",
      "earn\n",
      "Financial instruments and interest rates\n",
      "earn\n",
      "Dividend Finance\n",
      "earn\n",
      "Corporate actions: stock splits and earnings\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Dividend Finance\n",
      "earn\n",
      "Pharmaceutical industry mergers and acquisitions\n",
      "acq\n",
      "Corporate earnings and revenue\n",
      "earn\n",
      "Food and Beverage Mergers and Acquisitions\n",
      "acq\n",
      "Meat industry and trade\n",
      "livestock\n",
      "Economic aspects of interest rates\n",
      "interest\n",
      "Dividend records in finance\n",
      "earn\n",
      "Energy industry earnings and writedowns\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "acq\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Corporate ownership and mergers\n",
      "acq\n",
      "OPEC oil production\n",
      "gas\n",
      "Corporate Mergers and Acquisitions\n",
      "acq\n",
      "Central banking and monetary policy\n",
      "earn\n",
      "Corporate finance and acquisitions\n",
      "earn\n",
      "Aviation Financing and Technology\n",
      "earn\n",
      "Corporate actions: stock splits and earnings\n",
      "earn\n",
      "Mining and Gold Reserves\n",
      "gold\n",
      "Earnings from Discontinued Operations\n",
      "earn\n",
      "Earnings from Discontinued Operations\n",
      "acq\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "6"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "correct = 0\n",
    "for index in range(len(documents)):\n",
    "    query_embedding = model.encode(df['Name'][index])\n",
    "    passage_embedding = model.encode(test_set[index])\n",
    "    print(df['Name'][index])\n",
    "    sim_scores = util.dot_score(query_embedding, passage_embedding)[0].numpy()\n",
    "    rank_list = np.argsort(sim_scores)[-1]\n",
    "    print(test_set[index][rank_list])\n",
    "    if sim_scores[rank_list] >= 0.60:\n",
    "        correct +=1\n",
    "correct"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "growth\n",
      "money-supply 0.41968155\n",
      "oil\n",
      "soy-oil 0.72730654\n",
      "takeover\n",
      "lead 0.3945085\n",
      "acquisitions debt\n",
      "instal-debt 0.56405336\n",
      "dividend\n",
      "earn 0.45968884\n",
      "policy currency\n",
      "money-fx 0.5866241\n",
      "labor\n",
      "jobs 0.59846985\n",
      "mergers_acquisitions\n",
      "strategic-metal 0.25205106\n",
      "policy rates\n",
      "cpi 0.3969617\n",
      "restructuring\n",
      "housing 0.4261221\n",
      "revenue quarterly\n",
      "income 0.45236588\n",
      "retail\n",
      "retail 1.0000001\n",
      "debt\n",
      "instal-debt 0.63177705\n",
      "economy exports\n",
      "money-supply 0.37944\n",
      "shipping\n",
      "ship 0.7106267\n",
      "manufacturing\n",
      "jobs 0.48060197\n",
      "bonds\n",
      "interest 0.43741518\n",
      "indicators\n",
      "cpi 0.36446005\n",
      "trade agriculture\n",
      "livestock 0.53268754\n",
      "airlines\n",
      "jet 0.4906035\n",
      "weather\n",
      "heat 0.33420545\n",
      "relations\n",
      "interest 0.35102537\n",
      "divestiture\n",
      "jobs 0.34716272\n",
      "split\n",
      "interest 0.32740128\n",
      "earnings acquisitions\n",
      "earn 0.33341706\n",
      "discontinued\n",
      "retail 0.36432317\n",
      "mining\n",
      "fuel 0.4253512\n",
      "revenue earnings\n",
      "income 0.5739685\n",
      "finance government\n",
      "money-fx 0.49767777\n",
      "accidents\n",
      "jobs 0.41744506\n",
      "economic growth\n",
      "income 0.47945014\n",
      "media\n",
      "jobs 0.39661086\n",
      "finance record\n",
      "income 0.34613106\n",
      "investment earnings\n",
      "income 0.44439146\n",
      "inflation\n",
      "money-supply 0.58954203\n",
      "prices agriculture\n",
      "livestock 0.46125185\n",
      "acquisitions business\n",
      "retail 0.3102466\n",
      "prices opec\n",
      "money-fx 0.32969397\n",
      "takeover mergers\n",
      "strategic-metal 0.31218845\n",
      "finance stocks\n",
      "money-fx 0.39702082\n",
      "loss earnings\n",
      "earn 0.43801528\n",
      "politics agriculture\n",
      "livestock 0.51446474\n",
      "split\n",
      "interest 0.32740128\n",
      "foreign exchange\n",
      "money-fx 0.47832698\n",
      "results financial\n",
      "money-fx 0.4489707\n",
      "pricing\n",
      "retail 0.45913032\n",
      "profit sales\n",
      "retail 0.46496227\n",
      "meetings\n",
      "jobs 0.40140817\n",
      "trading\n",
      "trade 0.7527319\n",
      "budget\n",
      "money-supply 0.57203436\n",
      "financing\n",
      "interest 0.5126394\n",
      "acquisitions banking\n",
      "instal-debt 0.30512872\n",
      "debt\n",
      "instal-debt 0.63177705\n",
      "commodities agriculture\n",
      "livestock 0.5679706\n",
      "statements earnings\n",
      "income 0.47825953\n",
      "sales quarterly\n",
      "retail 0.4559718\n",
      "earnings profit\n",
      "earn 0.51135635\n",
      "quotas\n",
      "money-supply 0.423299\n",
      "losses\n",
      "heat 0.42585048\n",
      "performance\n",
      "cpu 0.48071155\n",
      "politics\n",
      "jobs 0.45481038\n",
      "payments\n",
      "money-supply 0.53160036\n",
      "earnings dividend\n",
      "earn 0.4004187\n",
      "sugar\n",
      "sugar 0.9999999\n",
      "unemployment\n",
      "jobs 0.565886\n",
      "farming\n",
      "livestock 0.5900942\n",
      "quota\n",
      "money-supply 0.4298923\n",
      "agriculture conservation\n",
      "livestock 0.53933734\n",
      "revenue profit\n",
      "income 0.5423593\n",
      "finance mergers\n",
      "money-fx 0.36641088\n",
      "surplus\n",
      "money-supply 0.5611529\n",
      "trade deficit\n",
      "trade 0.42880225\n",
      "liquidity\n",
      "money-supply 0.49846923\n",
      "natural disaster\n",
      "nat-gas 0.31070063\n",
      "subsidies\n",
      "money-supply 0.4175666\n",
      "finance interest_rates\n",
      "interest 0.5092614\n",
      "agriculture imports\n",
      "livestock 0.4647219\n",
      "earnings mergers\n",
      "income 0.32899895\n",
      "performance earnings\n",
      "income 0.41027182\n",
      "earnings taxes\n",
      "income 0.58343863\n",
      "deregulation\n",
      "cpi 0.3072612\n",
      "rates banking\n",
      "interest 0.3673181\n",
      "agreement\n",
      "acq 0.35592872\n",
      "agriculture exports\n",
      "livestock 0.46256825\n",
      "tax reform\n",
      "income 0.39545935\n",
      "regulatory\n",
      "interest 0.32159594\n",
      "economy investment\n",
      "interest 0.45006275\n",
      "stability\n",
      "reserves 0.2882573\n",
      "policy agriculture\n",
      "livestock 0.48660952\n",
      "business\n",
      "jobs 0.5904021\n",
      "housing\n",
      "housing 0.9999999\n",
      "trade currency\n",
      "money-fx 0.6184148\n",
      "acquisitions ownership\n",
      "strategic-metal 0.26404828\n",
      "meat\n",
      "carcass 0.58626497\n",
      "shipbuilding\n",
      "ship 0.61749184\n",
      "corporate governance\n",
      "strategic-metal 0.29859152\n",
      "legislation agriculture\n",
      "livestock 0.51596725\n",
      "central\n",
      "housing 0.3657829\n",
      "writedown earnings\n",
      "income 0.42682204\n",
      "protectionism\n",
      "rape-oil 0.3258156\n",
      "shareholders mergers\n",
      "strategic-metal 0.29243392\n",
      "finance currency\n",
      "money-fx 0.68486166\n",
      "changes\n",
      "heat 0.35643393\n",
      "earnings quarterly\n",
      "income 0.42343232\n",
      "acquisitions food\n",
      "meal-feed 0.4670689\n",
      "pharmaceuticals\n",
      "pet-chem 0.4420807\n",
      "dividend record\n",
      "income 0.27542722\n",
      "sugar\n",
      "sugar 0.9999999\n",
      "commodity prices\n",
      "cpi 0.38334107\n",
      "trade agreements\n",
      "trade 0.43318135\n",
      "tender offer\n",
      "trade 0.5352253\n",
      "earnings retail\n",
      "retail 0.66213286\n",
      "federal_reserve\n",
      "reserves 0.5896827\n",
      "sanctions\n",
      "rape-oil 0.3735156\n",
      "cocoa\n",
      "cocoa 1.0000001\n",
      "fraud\n",
      "retail 0.3955266\n",
      "lawsuits\n",
      "jobs 0.32784688\n",
      "mining gold\n",
      "gold 0.6516542\n",
      "acquisitions oil\n",
      "rape-oil 0.5160781\n",
      "regulation\n",
      "reserves 0.39434826\n",
      "coffee\n",
      "coffee 0.99999994\n",
      "tender offers\n",
      "trade 0.4578228\n",
      "earnings oil\n",
      "income 0.47901762\n",
      "finance banking\n",
      "money-fx 0.44615567\n",
      "opec\n",
      "oilseed 0.39506552\n",
      "legislation\n",
      "interest 0.35068893\n",
      "exchange rate\n",
      "money-fx 0.46336216\n",
      "coffee\n",
      "coffee 0.99999994\n",
      "aircraft\n",
      "jet 0.65141994\n",
      "profitability earnings\n",
      "income 0.45643467\n",
      "acquisitions technology\n",
      "strategic-metal 0.35534123\n",
      "trade imports\n",
      "trade 0.5783608\n",
      "loss revenue\n",
      "income 0.46161392\n",
      "disaster oil\n",
      "fuel 0.510424\n",
      "acquisitions negotiations\n",
      "trade 0.3432541\n",
      "trade market\n",
      "trade 0.5341217\n",
      "dividends earnings\n",
      "income 0.36892563\n",
      "coffee\n",
      "coffee 0.99999994\n",
      "soybeans\n",
      "soybean 0.9287957\n",
      "budget deficit\n",
      "money-supply 0.32219392\n",
      "energy\n",
      "heat 0.6003734\n",
      "loan\n",
      "interest 0.56841034\n",
      "exports trade\n",
      "trade 0.43615037\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "21"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "correct = 0\n",
    "for lb in rep_label:\n",
    "    query_embedding = model.encode(lb)\n",
    "    print(lb)\n",
    "    passage_embedding = model.encode(true_labels)\n",
    "    sim_scores = util.dot_score(query_embedding, passage_embedding)[0].numpy()\n",
    "    rank_list = np.argsort(sim_scores)[-1]\n",
    "    print(true_labels[rank_list], sim_scores[rank_list])\n",
    "    if sim_scores[rank_list] >= 0.60:\n",
    "        correct +=1\n",
    "correct"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.14685314685314685"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "correct/len(llama_label)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Multi-Label",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
